move setup_db.sh to scripts, remove test dir (#12273)

This commit is contained in:
Michelle Ark
2025-12-11 12:19:22 -05:00
committed by GitHub
parent 79a4c8969e
commit f10d84d05e
12 changed files with 7 additions and 521 deletions

View File

@@ -10,6 +10,5 @@ ignore =
E704 # makes Flake8 work like black
E741
E501 # long line checking is done in black
exclude = test/
per-file-ignores =
*/__init__.py: F401

View File

@@ -1 +1 @@
../../../test/setup_db.sh
../../../scripts/setup_db.sh

View File

@@ -211,7 +211,7 @@ jobs:
- name: Run postgres setup script
run: |
./test/setup_db.sh
./scripts/setup_db.sh
env:
PGHOST: localhost
PGPORT: 5432
@@ -292,7 +292,7 @@ jobs:
with:
timeout_minutes: 10
max_attempts: 3
command: ./test/setup_db.sh
command: ./scripts/setup_db.sh
- name: Set up postgres (windows)
if: runner.os == 'Windows'

View File

@@ -107,7 +107,7 @@ jobs:
- name: Run postgres setup script
run: |
./test/setup_db.sh
./scripts/setup_db.sh
env:
PGHOST: localhost
PGPORT: 5432

View File

@@ -111,7 +111,7 @@ jobs:
with:
timeout_minutes: 10
max_attempts: 3
command: ./test/setup_db.sh
command: ./scripts/setup_db.sh
- name: "Set up postgres (windows)"
if: inputs.os == 'windows-latest'

View File

@@ -168,7 +168,7 @@ Alternatively, you can run the setup commands directly:
```sh
docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash scripts/setup_db.sh
```
### Test commands

View File

@@ -118,7 +118,7 @@ test = [
# Database setup
setup-db = [
"docker compose up -d database",
"bash ../test/setup_db.sh",
"bash ../scripts/setup_db.sh",
]
# Utility commands

View File

View File

@@ -1,204 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"serde",
"time",
"winapi",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "libc"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
[[package]]
name = "log_parsing"
version = "0.1.0"
dependencies = [
"chrono",
"serde",
"serde_json",
"walkdir",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "proc-macro2"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.130"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.130"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ffa0837f2dfa6fb90868c2b5468cad482e175f7dad97e7421951e663f2b527"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,10 +0,0 @@
[package]
name = "log_parsing"
version = "0.1.0"
edition = "2018"
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0" }
chrono = { version = "0.4", features = ["serde"] }
walkdir = "2"

View File

@@ -1,299 +0,0 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::env;
use std::error::Error;
use std::fs::File;
use std::io::{self, BufRead};
use walkdir::WalkDir;
// Applies schema tests to file input
// if these fail, we either have a problem in dbt that needs to be resolved
// or we have changed our interface and the log_version should be bumped in dbt,
// modeled appropriately here, and publish new docs for the new log_version.
fn main() -> Result<(), Box<dyn Error>> {
let log_name = "dbt.log";
let path = env::var("LOG_DIR")
.expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`");
println!("Looking for files named `{}` in {}", log_name, path);
let lines: Vec<String> = get_input(&path, log_name)?;
println!("collected {} log lines.", lines.len());
println!("");
println!("testing type-level schema compliance by deserializing each line...");
let log_lines: Vec<LogLine> = deserialized_input(&lines).map_err(|e| {
format!(
"schema test failure: json doesn't match type definition\n{}",
e
)
})?;
println!("Done.");
println!("");
println!(
"because we skip non-json log lines, there are {} collected values to test.",
log_lines.len()
);
println!("");
// make sure when we read a string in then output it back to a string the two strings
// contain all the same key-value pairs.
println!("testing serialization loop to make sure all key-value pairs are accounted for");
test_deserialize_serialize_is_unchanged(&lines);
println!("Done.");
println!("");
// make sure each log_line contains the values we expect
println!("testing that the field values in each log line are expected");
for log_line in log_lines {
log_line.value_test()
}
println!("Done.");
Ok(())
}
// each nested type of LogLine should define its own value_test function
// that asserts values are within an expected set of values when possible.
trait ValueTest {
fn value_test(&self) -> ();
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
struct LogLine {
log_version: isize,
r#type: String,
code: String,
#[serde(with = "custom_date_format")]
ts: DateTime<Utc>,
pid: isize,
msg: String,
level: String,
invocation_id: String,
thread_name: String,
data: serde_json::Value, // TODO be more specific
}
impl ValueTest for LogLine {
fn value_test(&self) {
assert_eq!(
self.log_version, 2,
"The log version changed. Be sure this was intentional."
);
assert_eq!(
self.r#type,
"log_line".to_owned(),
"The type value has changed. If this is intentional, bump the log version"
);
assert!(
["debug", "info", "warn", "error"]
.iter()
.any(|level| **level == self.level),
"log level had unexpected value {}",
self.level
);
}
}
// logs output timestamps like this: "2021-11-30T12:31:04.312814Z"
// which is so close to the default except for the decimal.
// this requires handling the date with "%Y-%m-%dT%H:%M:%S%.6f" which requires this
// boilerplate-looking module.
mod custom_date_format {
use chrono::{DateTime, NaiveDateTime, Utc};
use serde::{self, Deserialize, Deserializer, Serializer};
const FORMAT: &'static str = "%Y-%m-%dT%H:%M:%S%.6fZ";
pub fn serialize<S>(date: &DateTime<Utc>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let s = format!("{}", date.format(FORMAT));
serializer.serialize_str(&s)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(DateTime::<Utc>::from_utc(
NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?,
Utc,
))
}
}
// finds all files in any subdirectory of this path with this name. returns the contents
// of each file line by line as one continuous structure. No distinction between files.
fn get_input(path: &str, file_name: &str) -> Result<Vec<String>, String> {
WalkDir::new(path)
.follow_links(true)
.into_iter()
// filters out all the exceptions encountered on this walk silently
.filter_map(|e| e.ok())
// walks through each file and returns the contents if the filename matches
.filter_map(|e| {
let f_name = e.file_name().to_string_lossy();
if f_name.ends_with(file_name) {
let contents = File::open(e.path())
.map_err(|e| {
format!(
"Something went wrong opening the log file {}\n{}",
f_name, e
)
})
.and_then(|file| {
io::BufReader::new(file)
.lines()
.map(|l| {
l.map_err(|e| {
format!(
"Something went wrong reading lines of the log file {}\n{}",
f_name, e
)
})
})
.collect::<Result<Vec<String>, String>>()
});
Some(contents)
} else {
None
}
})
.collect::<Result<Vec<Vec<String>>, String>>()
.map(|vv| vv.concat())
}
// attemps to deserialize the strings into LogLines. If the string isn't valid
// json it skips it instead of failing. This is so that any tests that generate
// non-json logs won't break the schema test.
fn deserialized_input(log_lines: &[String]) -> serde_json::Result<Vec<LogLine>> {
log_lines
.into_iter()
// if the log line isn't valid json format, toss it
.filter(|log_line| serde_json::from_str::<serde_json::Value>(log_line).is_ok())
// attempt to deserialize into our LogLine type
.map(|log_line| serde_json::from_str::<LogLine>(log_line))
.collect()
}
// Take a json string, deserialize it to a known value, and a generic json value.
// return the two json values so they can be compared.
// this helps to check if the deserialize-serialize loop for the known value drops necessary information.
//
// This function is used as a helper to check every json logline that dbt outputs for expected values.
// In practice, this often returns an Err value if the logs have unexpected non-json values such as logged
// exceptions, warnings, or printed statements.
fn deserialize_twice<'a, T: Serialize + Deserialize<'a>>(
json_str: &'a str,
) -> Result<(serde_json::Value, serde_json::Value), String> {
// deserialize the string into a JSON value with no knowledge of T's structure
let deserialized_json =
serde_json::from_str::<serde_json::Value>(json_str).map_err(|_| json_str)?;
// deserialize the string into a T
let deserialized_t = serde_json::from_str::<'a, T>(json_str).map_err(|_| json_str)?;
// serialize the T value into a string again
let serialized_t = serde_json::to_string(&deserialized_t).map_err(|_| json_str)?;
// deserialize the string into a JSON value
let deserialized_t_json =
serde_json::from_str::<serde_json::Value>(&serialized_t).map_err(|_| json_str)?;
Ok((deserialized_json, deserialized_t_json))
}
// This is only used to make more useful error messages and avoid assert statements that say that two massive
// json objects are not the same. this will dig into the values to find an inner json value where they differ.
fn compare_json(x: &serde_json::Value, y: &serde_json::Value) -> Result<(), serde_json::Value> {
if x == y {
return Ok(());
}
match (x, y) {
// check for object key mismatches
(serde_json::Value::Object(mx), serde_json::Value::Object(my)) => {
// check if the second map is missing keys from the first
for (xk, xv) in mx {
if !my.contains_key(xk) {
let mut m_err = serde_json::Map::new();
m_err.insert(xk.clone(), xv.clone());
return Err(serde_json::Value::Object(m_err));
}
}
// check if the first map is missing keys from the second
for (yk, yv) in my {
if !mx.contains_key(yk) {
let mut m_err = serde_json::Map::new();
m_err.insert(yk.clone(), yv.clone());
return Err(serde_json::Value::Object(m_err));
}
}
// all the keys are the same, so compare each value recursively.
for k in mx.keys() {
// unwrapping (which panics) since we know all these keys exist in both maps.
let xv = mx.get(k).unwrap();
let yv = my.get(k).unwrap();
compare_json(xv, yv)?;
}
// only reaches here if all the keys are the same and all of the values are the same
// and the top-level equality check on the map failed.
Ok(())
}
// must be a non-object json value. since there are no keys to specify, return the first value.
(x, _) => Err(x.clone()),
}
}
// make sure when we read a string in then output it back to a string the two strings
// contain all the same key-value pairs.
fn test_deserialize_serialize_is_unchanged(lines: &[String]) {
for line in lines {
match deserialize_twice::<LogLine>(line) {
// error if there are not two values to compare
Err(log_line) => assert!(false, "Logline cannot be deserialized into a json LogLine twice for value comparison:\n{}\n", log_line),
// if there are two values to compare, assert they are the same or find the key-value pair where they differ.
Ok((x, y)) => match compare_json(&x, &y) {
Err(json_value) => assert!(false, "LogLine values were inconsistent.\nSpecific difference:\n{}\nWhole log line:\n{}", json_value, line),
Ok(()) => ()
}
}
}
}
#[cfg(test)]
mod tests {
use crate::*;
const LOG_LINE: &str = r#"{"code": "Z023", "data": {"stats": {"error": 0, "pass": 3, "skip": 0, "total": 3, "warn": 0}}, "invocation_id": "f1e1557c-4f9d-4053-bb50-572cbbf2ca64", "level": "info", "log_version": 2, "msg": "Done. PASS=3 WARN=0 ERROR=0 SKIP=0 TOTAL=3", "pid": 75854, "thread_name": "MainThread", "ts": "2021-12-03T01:32:38.334601Z", "type": "log_line"}"#;
#[test]
fn test_basic_loop() {
assert!(deserialize_serialize_loop(&[LOG_LINE.to_owned()]).is_ok())
}
#[test]
fn test_values() {
assert!(deserialized_input(&[LOG_LINE.to_owned()])
.map(|v| { v.into_iter().map(|ll| ll.value_test()) })
.is_ok())
}
#[test]
fn test_values_loop() {
test_deserialize_serialize_is_unchanged(&[LOG_LINE.to_owned()]);
}
}