From f10d84d05eb87468771de1b4e88fe1cb66dbee21 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Thu, 11 Dec 2025 12:19:22 -0500 Subject: [PATCH] move setup_db.sh to scripts, remove test dir (#12273) --- .flake8 | 1 - .../setup-postgres-windows/setup_db.sh | 2 +- .github/workflows/main.yml | 4 +- .../structured-logging-schema-check.yml | 2 +- .github/workflows/test-repeater.yml | 2 +- CONTRIBUTING.md | 2 +- core/hatch.toml | 2 +- {test => scripts}/setup_db.sh | 0 test/__init__.py | 0 test/interop/log_parsing/Cargo.lock | 204 ------------ test/interop/log_parsing/Cargo.toml | 10 - test/interop/log_parsing/src/main.rs | 299 ------------------ 12 files changed, 7 insertions(+), 521 deletions(-) rename {test => scripts}/setup_db.sh (100%) delete mode 100644 test/__init__.py delete mode 100644 test/interop/log_parsing/Cargo.lock delete mode 100644 test/interop/log_parsing/Cargo.toml delete mode 100644 test/interop/log_parsing/src/main.rs diff --git a/.flake8 b/.flake8 index 084d3c016..accf60d34 100644 --- a/.flake8 +++ b/.flake8 @@ -10,6 +10,5 @@ ignore = E704 # makes Flake8 work like black E741 E501 # long line checking is done in black -exclude = test/ per-file-ignores = */__init__.py: F401 diff --git a/.github/actions/setup-postgres-windows/setup_db.sh b/.github/actions/setup-postgres-windows/setup_db.sh index ee75ef504..23fa267c7 120000 --- a/.github/actions/setup-postgres-windows/setup_db.sh +++ b/.github/actions/setup-postgres-windows/setup_db.sh @@ -1 +1 @@ -../../../test/setup_db.sh \ No newline at end of file +../../../scripts/setup_db.sh \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d10ca94cb..39e4f9dc7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -211,7 +211,7 @@ jobs: - name: Run postgres setup script run: | - ./test/setup_db.sh + ./scripts/setup_db.sh env: PGHOST: localhost PGPORT: 5432 @@ -292,7 +292,7 @@ jobs: with: timeout_minutes: 10 max_attempts: 3 - command: ./test/setup_db.sh + command: ./scripts/setup_db.sh - name: Set up postgres (windows) if: runner.os == 'Windows' diff --git a/.github/workflows/structured-logging-schema-check.yml b/.github/workflows/structured-logging-schema-check.yml index b714f9008..d83ed401a 100644 --- a/.github/workflows/structured-logging-schema-check.yml +++ b/.github/workflows/structured-logging-schema-check.yml @@ -107,7 +107,7 @@ jobs: - name: Run postgres setup script run: | - ./test/setup_db.sh + ./scripts/setup_db.sh env: PGHOST: localhost PGPORT: 5432 diff --git a/.github/workflows/test-repeater.yml b/.github/workflows/test-repeater.yml index 8917e5b56..10facaef7 100644 --- a/.github/workflows/test-repeater.yml +++ b/.github/workflows/test-repeater.yml @@ -111,7 +111,7 @@ jobs: with: timeout_minutes: 10 max_attempts: 3 - command: ./test/setup_db.sh + command: ./scripts/setup_db.sh - name: "Set up postgres (windows)" if: inputs.os == 'windows-latest' diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2cb390af9..69d9ce66d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -168,7 +168,7 @@ Alternatively, you can run the setup commands directly: ```sh docker-compose up -d database -PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh +PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash scripts/setup_db.sh ``` ### Test commands diff --git a/core/hatch.toml b/core/hatch.toml index e1d0bf368..6d61ec6b1 100644 --- a/core/hatch.toml +++ b/core/hatch.toml @@ -118,7 +118,7 @@ test = [ # Database setup setup-db = [ "docker compose up -d database", - "bash ../test/setup_db.sh", + "bash ../scripts/setup_db.sh", ] # Utility commands diff --git a/test/setup_db.sh b/scripts/setup_db.sh similarity index 100% rename from test/setup_db.sh rename to scripts/setup_db.sh diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/interop/log_parsing/Cargo.lock b/test/interop/log_parsing/Cargo.lock deleted file mode 100644 index 0ffcc7e09..000000000 --- a/test/interop/log_parsing/Cargo.lock +++ /dev/null @@ -1,204 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "serde", - "time", - "winapi", -] - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - -[[package]] -name = "libc" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119" - -[[package]] -name = "log_parsing" -version = "0.1.0" -dependencies = [ - "chrono", - "serde", - "serde_json", - "walkdir", -] - -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" -dependencies = [ - "autocfg", -] - -[[package]] -name = "proc-macro2" -version = "1.0.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "quote" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "ryu" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.130" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.130" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "syn" -version = "1.0.82" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "time" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" -dependencies = [ - "libc", - "wasi", - "winapi", -] - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "walkdir" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" -dependencies = [ - "same-file", - "winapi", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/test/interop/log_parsing/Cargo.toml b/test/interop/log_parsing/Cargo.toml deleted file mode 100644 index 688f00500..000000000 --- a/test/interop/log_parsing/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "log_parsing" -version = "0.1.0" -edition = "2018" - -[dependencies] -serde = { version = "1.0", features = ["derive"] } -serde_json = { version = "1.0" } -chrono = { version = "0.4", features = ["serde"] } -walkdir = "2" diff --git a/test/interop/log_parsing/src/main.rs b/test/interop/log_parsing/src/main.rs deleted file mode 100644 index 5764a5ed2..000000000 --- a/test/interop/log_parsing/src/main.rs +++ /dev/null @@ -1,299 +0,0 @@ -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use std::env; -use std::error::Error; -use std::fs::File; -use std::io::{self, BufRead}; -use walkdir::WalkDir; - -// Applies schema tests to file input -// if these fail, we either have a problem in dbt that needs to be resolved -// or we have changed our interface and the log_version should be bumped in dbt, -// modeled appropriately here, and publish new docs for the new log_version. -fn main() -> Result<(), Box> { - let log_name = "dbt.log"; - let path = env::var("LOG_DIR") - .expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`"); - - println!("Looking for files named `{}` in {}", log_name, path); - let lines: Vec = get_input(&path, log_name)?; - println!("collected {} log lines.", lines.len()); - - println!(""); - - println!("testing type-level schema compliance by deserializing each line..."); - let log_lines: Vec = deserialized_input(&lines).map_err(|e| { - format!( - "schema test failure: json doesn't match type definition\n{}", - e - ) - })?; - println!("Done."); - - println!(""); - println!( - "because we skip non-json log lines, there are {} collected values to test.", - log_lines.len() - ); - println!(""); - - // make sure when we read a string in then output it back to a string the two strings - // contain all the same key-value pairs. - println!("testing serialization loop to make sure all key-value pairs are accounted for"); - test_deserialize_serialize_is_unchanged(&lines); - println!("Done."); - - println!(""); - - // make sure each log_line contains the values we expect - println!("testing that the field values in each log line are expected"); - for log_line in log_lines { - log_line.value_test() - } - println!("Done."); - - Ok(()) -} - -// each nested type of LogLine should define its own value_test function -// that asserts values are within an expected set of values when possible. -trait ValueTest { - fn value_test(&self) -> (); -} - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] -struct LogLine { - log_version: isize, - r#type: String, - code: String, - #[serde(with = "custom_date_format")] - ts: DateTime, - pid: isize, - msg: String, - level: String, - invocation_id: String, - thread_name: String, - data: serde_json::Value, // TODO be more specific -} - -impl ValueTest for LogLine { - fn value_test(&self) { - assert_eq!( - self.log_version, 2, - "The log version changed. Be sure this was intentional." - ); - - assert_eq!( - self.r#type, - "log_line".to_owned(), - "The type value has changed. If this is intentional, bump the log version" - ); - - assert!( - ["debug", "info", "warn", "error"] - .iter() - .any(|level| **level == self.level), - "log level had unexpected value {}", - self.level - ); - } -} - -// logs output timestamps like this: "2021-11-30T12:31:04.312814Z" -// which is so close to the default except for the decimal. -// this requires handling the date with "%Y-%m-%dT%H:%M:%S%.6f" which requires this -// boilerplate-looking module. -mod custom_date_format { - use chrono::{DateTime, NaiveDateTime, Utc}; - use serde::{self, Deserialize, Deserializer, Serializer}; - - const FORMAT: &'static str = "%Y-%m-%dT%H:%M:%S%.6fZ"; - - pub fn serialize(date: &DateTime, serializer: S) -> Result - where - S: Serializer, - { - let s = format!("{}", date.format(FORMAT)); - serializer.serialize_str(&s) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - Ok(DateTime::::from_utc( - NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?, - Utc, - )) - } -} - -// finds all files in any subdirectory of this path with this name. returns the contents -// of each file line by line as one continuous structure. No distinction between files. -fn get_input(path: &str, file_name: &str) -> Result, String> { - WalkDir::new(path) - .follow_links(true) - .into_iter() - // filters out all the exceptions encountered on this walk silently - .filter_map(|e| e.ok()) - // walks through each file and returns the contents if the filename matches - .filter_map(|e| { - let f_name = e.file_name().to_string_lossy(); - if f_name.ends_with(file_name) { - let contents = File::open(e.path()) - .map_err(|e| { - format!( - "Something went wrong opening the log file {}\n{}", - f_name, e - ) - }) - .and_then(|file| { - io::BufReader::new(file) - .lines() - .map(|l| { - l.map_err(|e| { - format!( - "Something went wrong reading lines of the log file {}\n{}", - f_name, e - ) - }) - }) - .collect::, String>>() - }); - - Some(contents) - } else { - None - } - }) - .collect::>, String>>() - .map(|vv| vv.concat()) -} - -// attemps to deserialize the strings into LogLines. If the string isn't valid -// json it skips it instead of failing. This is so that any tests that generate -// non-json logs won't break the schema test. -fn deserialized_input(log_lines: &[String]) -> serde_json::Result> { - log_lines - .into_iter() - // if the log line isn't valid json format, toss it - .filter(|log_line| serde_json::from_str::(log_line).is_ok()) - // attempt to deserialize into our LogLine type - .map(|log_line| serde_json::from_str::(log_line)) - .collect() -} - -// Take a json string, deserialize it to a known value, and a generic json value. -// return the two json values so they can be compared. -// this helps to check if the deserialize-serialize loop for the known value drops necessary information. -// -// This function is used as a helper to check every json logline that dbt outputs for expected values. -// In practice, this often returns an Err value if the logs have unexpected non-json values such as logged -// exceptions, warnings, or printed statements. -fn deserialize_twice<'a, T: Serialize + Deserialize<'a>>( - json_str: &'a str, -) -> Result<(serde_json::Value, serde_json::Value), String> { - // deserialize the string into a JSON value with no knowledge of T's structure - let deserialized_json = - serde_json::from_str::(json_str).map_err(|_| json_str)?; - - // deserialize the string into a T - let deserialized_t = serde_json::from_str::<'a, T>(json_str).map_err(|_| json_str)?; - - // serialize the T value into a string again - let serialized_t = serde_json::to_string(&deserialized_t).map_err(|_| json_str)?; - - // deserialize the string into a JSON value - let deserialized_t_json = - serde_json::from_str::(&serialized_t).map_err(|_| json_str)?; - - Ok((deserialized_json, deserialized_t_json)) -} - -// This is only used to make more useful error messages and avoid assert statements that say that two massive -// json objects are not the same. this will dig into the values to find an inner json value where they differ. -fn compare_json(x: &serde_json::Value, y: &serde_json::Value) -> Result<(), serde_json::Value> { - if x == y { - return Ok(()); - } - - match (x, y) { - // check for object key mismatches - (serde_json::Value::Object(mx), serde_json::Value::Object(my)) => { - // check if the second map is missing keys from the first - for (xk, xv) in mx { - if !my.contains_key(xk) { - let mut m_err = serde_json::Map::new(); - m_err.insert(xk.clone(), xv.clone()); - return Err(serde_json::Value::Object(m_err)); - } - } - - // check if the first map is missing keys from the second - for (yk, yv) in my { - if !mx.contains_key(yk) { - let mut m_err = serde_json::Map::new(); - m_err.insert(yk.clone(), yv.clone()); - return Err(serde_json::Value::Object(m_err)); - } - } - - // all the keys are the same, so compare each value recursively. - for k in mx.keys() { - // unwrapping (which panics) since we know all these keys exist in both maps. - let xv = mx.get(k).unwrap(); - let yv = my.get(k).unwrap(); - - compare_json(xv, yv)?; - } - - // only reaches here if all the keys are the same and all of the values are the same - // and the top-level equality check on the map failed. - Ok(()) - } - - // must be a non-object json value. since there are no keys to specify, return the first value. - (x, _) => Err(x.clone()), - } -} - -// make sure when we read a string in then output it back to a string the two strings -// contain all the same key-value pairs. -fn test_deserialize_serialize_is_unchanged(lines: &[String]) { - for line in lines { - match deserialize_twice::(line) { - // error if there are not two values to compare - Err(log_line) => assert!(false, "Logline cannot be deserialized into a json LogLine twice for value comparison:\n{}\n", log_line), - // if there are two values to compare, assert they are the same or find the key-value pair where they differ. - Ok((x, y)) => match compare_json(&x, &y) { - Err(json_value) => assert!(false, "LogLine values were inconsistent.\nSpecific difference:\n{}\nWhole log line:\n{}", json_value, line), - Ok(()) => () - } - } - } -} - -#[cfg(test)] -mod tests { - use crate::*; - - const LOG_LINE: &str = r#"{"code": "Z023", "data": {"stats": {"error": 0, "pass": 3, "skip": 0, "total": 3, "warn": 0}}, "invocation_id": "f1e1557c-4f9d-4053-bb50-572cbbf2ca64", "level": "info", "log_version": 2, "msg": "Done. PASS=3 WARN=0 ERROR=0 SKIP=0 TOTAL=3", "pid": 75854, "thread_name": "MainThread", "ts": "2021-12-03T01:32:38.334601Z", "type": "log_line"}"#; - - #[test] - fn test_basic_loop() { - assert!(deserialize_serialize_loop(&[LOG_LINE.to_owned()]).is_ok()) - } - - #[test] - fn test_values() { - assert!(deserialized_input(&[LOG_LINE.to_owned()]) - .map(|v| { v.into_iter().map(|ll| ll.value_test()) }) - .is_ok()) - } - - #[test] - fn test_values_loop() { - test_deserialize_serialize_is_unchanged(&[LOG_LINE.to_owned()]); - } -}