Compare commits

...

1 Commits

Author SHA1 Message Date
Nathaniel May
cdb78d0270 Revert "Add Performance Regression Testing [Rust]" 2021-08-11 10:45:31 -04:00
27 changed files with 0 additions and 1326 deletions

View File

@@ -1,181 +0,0 @@
name: Performance Regression Testing
# Schedule triggers
on:
# TODO this is just while developing
pull_request:
branches:
- 'develop'
- 'performance-regression-testing'
schedule:
# runs twice a day at 10:05am and 10:05pm
- cron: '5 10,22 * * *'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
jobs:
# checks fmt of runner code
# purposefully not a dependency of any other job
# will block merging, but not prevent developing
fmt:
name: Cargo fmt
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- run: rustup component add rustfmt
- uses: actions-rs/cargo@v1
with:
command: fmt
args: --manifest-path performance/runner/Cargo.toml --all -- --check
# runs any tests associated with the runner
# these tests make sure the runner logic is correct
test-runner:
name: Test Runner
runs-on: ubuntu-latest
env:
# turns errors into warnings
RUSTFLAGS: "-D warnings"
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: test
args: --manifest-path performance/runner/Cargo.toml
# build an optimized binary to be used as the runner in later steps
build-runner:
needs: [test-runner]
name: Build Runner
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-D warnings"
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: actions-rs/cargo@v1
with:
command: build
args: --release --manifest-path performance/runner/Cargo.toml
- uses: actions/upload-artifact@v2
with:
name: runner
path: performance/runner/target/release/runner
# run the performance measurements on the current or default branch
measure-dev:
needs: [build-runner]
name: Measure Dev Branch
runs-on: ubuntu-latest
steps:
- name: checkout dev
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: '3.8'
- name: install dbt
run: pip install -r dev-requirements.txt -r editable-requirements.txt
- name: install hyperfine
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run
run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/
- uses: actions/upload-artifact@v2
with:
name: dev-results
path: performance/results/
# run the performance measurements on the release branch which we use
# as a performance baseline. This part takes by far the longest, so
# we do everything we can first so the job fails fast.
# -----
# we need to checkout dbt twice in this job: once for the baseline dbt
# version, and once to get the latest regression testing projects,
# metrics, and runner code from the develop or current branch so that
# the calculations match for both versions of dbt we are comparing.
measure-baseline:
needs: [build-runner]
name: Measure Baseline Branch
runs-on: ubuntu-latest
steps:
- name: checkout latest
uses: actions/checkout@v2
with:
ref: '0.20.latest'
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: '3.8'
- name: move repo up a level
run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline
- name: "[debug] ls new dbt location"
run: ls ${{ github.workspace }}/../baseline/dbt/
# installation creates egg-links so we have to preserve source
- name: install dbt from new location
run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
# checkout the current branch to get all the target projects
# this deletes the old checked out code which is why we had to copy before
- name: checkout dev
uses: actions/checkout@v2
- name: install hyperfine
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run runner
run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/
- uses: actions/upload-artifact@v2
with:
name: baseline-results
path: performance/results/
# detect regressions on the output generated from measuring
# the two branches. Exits with non-zero code if a regression is detected.
calculate-regressions:
needs: [measure-dev, measure-baseline]
name: Compare Results
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v2
with:
name: dev-results
- uses: actions/download-artifact@v2
with:
name: baseline-results
- name: "[debug] ls result files"
run: ls
- uses: actions/download-artifact@v2
with:
name: runner
- name: change permissions
run: chmod +x ./runner
- name: run calculation
run: ./runner calculate -r ./
# always attempt to upload the results even if there were regressions found
- uses: actions/upload-artifact@v2
if: ${{ always() }}
with:
name: final-calculations
path: ./final_calculations.json

View File

@@ -22,7 +22,6 @@
- Fix for RPC requests that raise a RecursionError when serializing Undefined values as JSON ([#3464](https://github.com/dbt-labs/dbt/issues/3464), [#3687](https://github.com/dbt-labs/dbt/pull/3687))
### Under the hood
- Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602)
- Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468))
- Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473))
- Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536))

View File

@@ -1,18 +0,0 @@
# Performance Regression Testing
This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards.
## Adding a new dbt project
Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.
## Adding a new dbt command
In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed.
## Future work
- add more projects to test different configurations that have been known bottlenecks
- add more dbt commands to measure
- possibly using the uploaded json artifacts to store these results so they can be graphed over time
- reading new metrics from a file so no one has to edit rust source to add them to the suite
- instead of building the rust every time, we could publish and pull down the latest version.
- instead of manually setting the baseline version of dbt to test, pull down the latest stable version as the baseline.

View File

@@ -1 +0,0 @@
id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b

View File

@@ -1,12 +0,0 @@
default:
target: dev
outputs:
dev:
type: postgres
host: localhost
user: dummy
password: dummy_password
port: 5432
dbname: dummy
schema: dummy
threads: 4

View File

@@ -1,38 +0,0 @@
# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'my_new_package'
version: 1.0.0
config-version: 2
# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'default'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"
# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
# In this example config, we tell dbt to build all models in the example/ directory
# as views (the default). These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
my_new_package:
# Applies to all files under models/example/
example:
materialized: view

View File

@@ -1 +0,0 @@
select 1 as id

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_0
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_1
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_2
version: 2

View File

@@ -1,38 +0,0 @@
# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'my_new_package'
version: 1.0.0
config-version: 2
# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'default'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"
# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
# In this example config, we tell dbt to build all models in the example/ directory
# as views (the default). These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
my_new_package:
# Applies to all files under models/example/
example:
materialized: view

View File

@@ -1 +0,0 @@
select 1 as id

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_0
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_1
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: node_0
name: node_2
version: 2

View File

@@ -1,5 +0,0 @@
# all files here are generated results
*
# except this one
!.gitignore

View File

@@ -1,2 +0,0 @@
target/
projects/*/logs

View File

@@ -1,307 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "clap"
version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "runner"
version = "0.1.0"
dependencies = [
"itertools",
"serde",
"serde_json",
"structopt",
"thiserror",
]
[[package]]
name = "ryu"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "serde"
version = "1.0.127"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.127"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "structopt"
version = "0.3.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71"
dependencies = [
"clap",
"lazy_static",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "1.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-segmentation"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,11 +0,0 @@
[package]
name = "runner"
version = "0.1.0"
edition = "2018"
[dependencies]
itertools = "0.10.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
structopt = "0.3"
thiserror = "1.0.26"

View File

@@ -1,269 +0,0 @@
use crate::exceptions::{CalculateError, IOError};
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use std::fs;
use std::fs::DirEntry;
use std::path::{Path, PathBuf};
// This type exactly matches the type of array elements
// from hyperfine's output. Deriving `Serialize` and `Deserialize`
// gives us read and write capabilities via json_serde.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Measurement {
pub command: String,
pub mean: f64,
pub stddev: f64,
pub median: f64,
pub user: f64,
pub system: f64,
pub min: f64,
pub max: f64,
pub times: Vec<f64>,
}
// This type exactly matches the type of hyperfine's output.
// Deriving `Serialize` and `Deserialize` gives us read and
// write capabilities via json_serde.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Measurements {
pub results: Vec<Measurement>,
}
// Output data from a comparison between runs on the baseline
// and dev branches.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Data {
pub threshold: f64,
pub difference: f64,
pub baseline: f64,
pub dev: f64,
}
// The full output from a comparison between runs on the baseline
// and dev branches.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Calculation {
pub metric: String,
pub regression: bool,
pub data: Data,
}
// A type to describe which measurement we are working with. This
// information is parsed from the filename of hyperfine's output.
#[derive(Debug, Clone, PartialEq)]
pub struct MeasurementGroup {
pub version: String,
pub run: String,
pub measurement: Measurement,
}
// Given two measurements, return all the calculations. Calculations are
// flagged as regressions or not regressions.
fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
let median_threshold = 1.05; // 5% regression threshold
let median_difference = dev.median / baseline.median;
let stddev_threshold = 1.20; // 20% regression threshold
let stddev_difference = dev.stddev / baseline.stddev;
vec![
Calculation {
metric: ["median", metric].join("_"),
regression: median_difference > median_threshold,
data: Data {
threshold: median_threshold,
difference: median_difference,
baseline: baseline.median,
dev: dev.median,
},
},
Calculation {
metric: ["stddev", metric].join("_"),
regression: stddev_difference > stddev_threshold,
data: Data {
threshold: stddev_threshold,
difference: stddev_difference,
baseline: baseline.stddev,
dev: dev.stddev,
},
},
]
}
// Given a directory, read all files in the directory and return each
// filename with the deserialized json contents of that file.
fn measurements_from_files(
results_directory: &Path,
) -> Result<Vec<(PathBuf, Measurements)>, CalculateError> {
fs::read_dir(results_directory)
.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
.or_else(|e| Err(CalculateError::CalculateIOError(e)))?
.into_iter()
.map(|entry| {
let ent: DirEntry = entry
.or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
.or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
Ok(ent.path())
})
.collect::<Result<Vec<PathBuf>, CalculateError>>()?
.iter()
.filter(|path| {
path.extension()
.and_then(|ext| ext.to_str())
.map_or(false, |ext| ext.ends_with("json"))
})
.map(|path| {
fs::read_to_string(path)
.or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e))))
.or_else(|e| Err(CalculateError::CalculateIOError(e)))
.and_then(|contents| {
serde_json::from_str::<Measurements>(&contents)
.or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e))))
})
.map(|m| (path.clone(), m))
})
.collect()
}
// Given a list of filename-measurement pairs, detect any regressions by grouping
// measurements together by filename.
fn calculate_regressions(
measurements: &[(&PathBuf, &Measurement)],
) -> Result<Vec<Calculation>, CalculateError> {
/*
Strategy of this function body:
1. [Measurement] -> [MeasurementGroup]
2. Sort the MeasurementGroups
3. Group the MeasurementGroups by "run"
4. Call `calculate` with the two resulting Measurements as input
*/
let mut measurement_groups: Vec<MeasurementGroup> = measurements
.iter()
.map(|(p, m)| {
p.file_name()
.ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf()))
.and_then(|name| {
name.to_str()
.ok_or_else(|| IOError::FilenameNotUnicodeErr(p.to_path_buf()))
})
.map(|name| {
let parts: Vec<&str> = name.split("_").collect();
MeasurementGroup {
version: parts[0].to_owned(),
run: parts[1..].join("_"),
measurement: (*m).clone(),
}
})
})
.collect::<Result<Vec<MeasurementGroup>, IOError>>()
.or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
// locking up mutation
let sorted_measurement_groups = measurement_groups;
let calculations: Vec<Calculation> = sorted_measurement_groups
.iter()
.group_by(|x| &x.run)
.into_iter()
.map(|(_, g)| {
let mut groups: Vec<&MeasurementGroup> = g.collect();
groups.sort_by(|x, y| x.version.cmp(&y.version));
match groups.len() {
2 => {
let dev = &groups[1];
let baseline = &groups[0];
if dev.version == "dev" && baseline.version == "baseline" {
Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement))
} else {
Err(CalculateError::BadBranchNameErr(
baseline.version.clone(),
dev.version.clone(),
))
}
}
i => {
let gs: Vec<MeasurementGroup> = groups.into_iter().map(|x| x.clone()).collect();
Err(CalculateError::BadGroupSizeErr(i, gs))
}
}
})
.collect::<Result<Vec<Vec<Calculation>>, CalculateError>>()?
.concat();
Ok(calculations)
}
// Top-level function. Given a path for the result directory, call the above
// functions to compare and collect calculations. Calculations include both
// metrics that fall within the threshold and regressions.
pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
measurements_from_files(Path::new(&results_directory)).and_then(|v| {
// exit early with an Err if there are no results to process
if v.len() <= 0 {
Err(CalculateError::NoResultsErr(results_directory.clone()))
// we expect two runs for each project-metric pairing: one for each branch, baseline
// and dev. An odd result count is unexpected.
} else if v.len() % 2 == 1 {
Err(CalculateError::OddResultsCountErr(
v.len(),
results_directory.clone(),
))
} else {
// otherwise, we can do our comparisons
let measurements = v
.iter()
// the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
.map(|(p, ms)| (p, &ms.results[0]))
.collect::<Vec<(&PathBuf, &Measurement)>>();
calculate_regressions(&measurements[..])
}
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_5_percent_regression() {
let dev = Measurement {
command: "some command".to_owned(),
mean: 1.06,
stddev: 1.06,
median: 1.06,
user: 1.06,
system: 1.06,
min: 1.06,
max: 1.06,
times: vec![],
};
let baseline = Measurement {
command: "some command".to_owned(),
mean: 1.00,
stddev: 1.00,
median: 1.00,
user: 1.00,
system: 1.00,
min: 1.00,
max: 1.00,
times: vec![],
};
let calculations = calculate("test_metric", &dev, &baseline);
let regressions: Vec<&Calculation> =
calculations.iter().filter(|calc| calc.regression).collect();
// expect one regression for median
println!("{:#?}", regressions);
assert_eq!(regressions.len(), 1);
assert_eq!(regressions[0].metric, "median_test_metric");
}
}

View File

@@ -1,155 +0,0 @@
use crate::calculate::*;
use std::io;
#[cfg(test)]
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;
// Custom IO Error messages for the IO errors we encounter.
// New constructors should be added to wrap any new IO errors.
// The desired output of these errors is tested below.
#[derive(Debug, Error)]
pub enum IOError {
#[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
ReadErr(PathBuf, Option<io::Error>),
#[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
MissingFilenameErr(PathBuf),
#[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
FilenameNotUnicodeErr(PathBuf),
#[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
BadFileContentsErr(PathBuf, Option<io::Error>),
#[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
CommandErr(Option<io::Error>),
}
// Custom Error messages for the error states we could encounter
// during calculation, and are not prevented at compile time. New
// constructors should be added for any new error situations that
// come up. The desired output of these errors is tested below.
#[derive(Debug, Error)]
pub enum CalculateError {
#[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
BadJSONErr(PathBuf, Option<serde_json::Error>),
#[error("{}", .0)]
CalculateIOError(IOError),
#[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
NoResultsErr(PathBuf),
#[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())]
OddResultsCountErr(usize, PathBuf),
#[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
BadGroupSizeErr(usize, Vec<MeasurementGroup>),
#[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)]
BadBranchNameErr(String, String),
}
// Tests for exceptions
#[cfg(test)]
mod tests {
use super::*;
// Tests the output fo io error messages. There should be at least one per enum constructor.
#[test]
fn test_io_error_messages() {
let pairs = vec![
(
IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
r#"ReadErr: The file cannot be read.
Filepath: dummy/path/file.json
Originating Exception: None"#,
),
(
IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
r#"MissingFilenameErr: The path provided does not specify a file.
Filepath: dummy/path/no_file/"#,
),
(
IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
Filepath: dummy/path/no_file/"#,
),
(
IOError::BadFileContentsErr(
Path::new("dummy/path/filenotexist.json").to_path_buf(),
None,
),
r#"BadFileContentsErr: Check that the file exists and is readable.
Filepath: dummy/path/filenotexist.json
Originating Exception: None"#,
),
(
IOError::CommandErr(None),
r#"CommandErr: System command failed to run.
Originating Exception: None"#,
),
];
for (err, msg) in pairs {
assert_eq!(format!("{}", err), msg)
}
}
// Tests the output fo calculate error messages. There should be at least one per enum constructor.
#[test]
fn test_calculate_error_messages() {
let pairs = vec![
(
CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
r#"BadJSONErr: JSON in file cannot be deserialized as expected.
Filepath: dummy/path/file.json
Originating Exception: None"#,
),
(
CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
r#"BadJSONErr: JSON in file cannot be deserialized as expected.
Filepath: dummy/path/file.json
Originating Exception: None"#,
),
(
CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
r#"NoResultsErr: The results directory has no json files in it.
Filepath: dummy/path/no_file/"#,
),
(
CalculateError::OddResultsCountErr(
3,
Path::new("dummy/path/no_file/").to_path_buf(),
),
r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
File Count: 3
Filepath: dummy/path/no_file/"#,
),
(
CalculateError::BadGroupSizeErr(
1,
vec![MeasurementGroup {
version: "dev".to_owned(),
run: "some command".to_owned(),
measurement: Measurement {
command: "some command".to_owned(),
mean: 1.0,
stddev: 1.0,
median: 1.0,
user: 1.0,
system: 1.0,
min: 1.0,
max: 1.0,
times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
},
}],
),
r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
Count: 1
Group: [("dev", "some command")]"#,
),
(
CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
Found: boop, noop"#,
),
];
for (err, msg) in pairs {
assert_eq!(format!("{}", err), msg)
}
}
}

View File

@@ -1,119 +0,0 @@
extern crate structopt;
mod calculate;
mod exceptions;
mod measure;
use crate::calculate::Calculation;
use crate::exceptions::CalculateError;
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use structopt::StructOpt;
// This type defines the commandline interface and is generated
// by `derive(StructOpt)`
#[derive(Clone, Debug, StructOpt)]
#[structopt(name = "performance", about = "performance regression testing runner")]
enum Opt {
#[structopt(name = "measure")]
Measure {
#[structopt(parse(from_os_str))]
#[structopt(short)]
projects_dir: PathBuf,
#[structopt(short)]
branch_name: String,
},
#[structopt(name = "calculate")]
Calculate {
#[structopt(parse(from_os_str))]
#[structopt(short)]
results_dir: PathBuf,
},
}
// enables proper useage of exit() in main.
// https://doc.rust-lang.org/std/process/fn.exit.html#examples
//
// This is where all the printing should happen. Exiting happens
// in main, and module functions should only return values.
fn run_app() -> Result<i32, CalculateError> {
// match what the user inputs from the cli
match Opt::from_args() {
// measure subcommand
Opt::Measure {
projects_dir,
branch_name,
} => {
// if there are any nonzero exit codes from the hyperfine runs,
// return the first one. otherwise return zero.
measure::measure(&projects_dir, &branch_name)
.or_else(|e| Err(CalculateError::CalculateIOError(e)))?
.iter()
.map(|status| status.code())
.flatten()
.filter(|code| *code != 0)
.collect::<Vec<i32>>()
.get(0)
.map_or(Ok(0), |x| {
println!("Main: a child process exited with a nonzero status code.");
Ok(*x)
})
}
// calculate subcommand
Opt::Calculate { results_dir } => {
// get all the calculations or gracefully show the user an exception
let calculations = calculate::regressions(&results_dir)?;
// print all calculations to stdout so they can be easily debugged
// via CI.
println!(":: All Calculations ::\n");
for c in &calculations {
println!("{:#?}\n", c);
}
// indented json string representation of the calculations array
let json_calcs = serde_json::to_string_pretty(&calculations)
.expect("Main: Failed to serialize calculations to json");
// create the empty destination file, and write the json string
let outfile = &mut results_dir.into_os_string();
outfile.push("/final_calculations.json");
let mut f = File::create(outfile).expect("Main: Unable to create file");
f.write_all(json_calcs.as_bytes())
.expect("Main: Unable to write data");
// filter for regressions
let regressions: Vec<&Calculation> =
calculations.iter().filter(|c| c.regression).collect();
// return a non-zero exit code if there are regressions
match regressions[..] {
[] => {
println!("congrats! no regressions :)");
Ok(0)
}
_ => {
// print all calculations to stdout so they can be easily
// debugged via CI.
println!(":: Regressions Found ::\n");
for r in regressions {
println!("{:#?}\n", r);
}
Ok(1)
}
}
}
}
}
fn main() {
std::process::exit(match run_app() {
Ok(code) => code,
Err(err) => {
eprintln!("{}", err);
1
}
});
}

View File

@@ -1,89 +0,0 @@
use crate::exceptions::IOError;
use std::fs;
use std::path::PathBuf;
use std::process::{Command, ExitStatus};
// `Metric` defines a dbt command that we want to measure on both the
// baseline and dev branches.
#[derive(Debug, Clone)]
struct Metric<'a> {
name: &'a str,
prepare: &'a str,
cmd: &'a str,
}
impl Metric<'_> {
// Returns the proper filename for the hyperfine output for this metric.
fn outfile(&self, project: &str, branch: &str) -> String {
[branch, "_", self.name, "_", project, ".json"].join("")
}
}
// Calls hyperfine via system command, and returns all the exit codes for each hyperfine run.
pub fn measure<'a>(
projects_directory: &PathBuf,
dbt_branch: &str,
) -> Result<Vec<ExitStatus>, IOError> {
/*
Strategy of this function body:
1. Read all directory names in `projects_directory`
2. Pair `n` projects with `m` metrics for a total of n*m pairs
3. Run hyperfine on each project-metric pair
*/
// To add a new metric to the test suite, simply define it in this list:
// TODO: This could be read from a config file in a future version.
let metrics: Vec<Metric> = vec![Metric {
name: "parse",
prepare: "rm -rf target/",
cmd: "dbt parse --no-version-check",
}];
fs::read_dir(projects_directory)
.or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
.map(|entry| {
let path = entry
.or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
.path();
let project_name: String = path
.file_name()
.ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
.and_then(|x| {
x.to_str()
.ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf()))
})?
.to_owned();
// each project-metric pair we will run
let pairs = metrics
.iter()
.map(|metric| (path.clone(), project_name.clone(), metric))
.collect::<Vec<(PathBuf, String, &Metric<'a>)>>();
Ok(pairs)
})
.collect::<Result<Vec<Vec<(PathBuf, String, &Metric<'a>)>>, IOError>>()?
.concat()
.iter()
// run hyperfine on each pairing
.map(|(path, project_name, metric)| {
Command::new("hyperfine")
.current_dir(path)
// warms filesystem caches by running the command first without counting it.
// alternatively we could clear them before each run
.arg("--warmup")
.arg("1")
.arg("--prepare")
.arg(metric.prepare)
.arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
.arg("--export-json")
.arg(["../../results/", &metric.outfile(project_name, dbt_branch)].join(""))
// this prevents hyperfine from capturing dbt's output.
// Noisy, but good for debugging when tests fail.
.arg("--show-output")
.status() // use spawn() here instead for more information
.or_else(|e| Err(IOError::CommandErr(Some(e))))
})
.collect()
}