Revert "Add Performance Regression Testing [Rust]"

2025-12-19 09:21:27 +00:00 · 2021-08-11 10:45:31 -04:00
27 changed files with 0 additions and 1326 deletions
--- a/.github/workflows/performance.yml
+++ b/.github/workflows/performance.yml
@@ -1,181 +0,0 @@
 name: Performance Regression Testing
 # Schedule triggers
 on:
  # TODO this is just while developing
  pull_request:
    branches:
      - 'develop' 
      - 'performance-regression-testing'
  schedule:
    # runs twice a day at 10:05am and 10:05pm
    - cron:  '5 10,22 * * *'
  # Allows you to run this workflow manually from the Actions tab
  workflow_dispatch:
 jobs:
  # checks fmt of runner code
  # purposefully not a dependency of any other job
  # will block merging, but not prevent developing
  fmt:
    name: Cargo fmt
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - run: rustup component add rustfmt
      - uses: actions-rs/cargo@v1
        with:
          command: fmt
          args: --manifest-path performance/runner/Cargo.toml --all -- --check
  # runs any tests associated with the runner
  # these tests make sure the runner logic is correct
  test-runner:
    name: Test Runner
    runs-on: ubuntu-latest
    env:
      # turns errors into warnings
      RUSTFLAGS: "-D warnings"
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - uses: actions-rs/cargo@v1
        with:
          command: test
          args: --manifest-path performance/runner/Cargo.toml
  # build an optimized binary to be used as the runner in later steps
  build-runner:
    needs: [test-runner]
    name: Build Runner
    runs-on: ubuntu-latest
    env:
      RUSTFLAGS: "-D warnings"
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true
      - uses: actions-rs/cargo@v1
        with:
          command: build
          args: --release --manifest-path performance/runner/Cargo.toml
      - uses: actions/upload-artifact@v2
        with:
          name: runner
          path: performance/runner/target/release/runner
  # run the performance measurements on the current or default branch
  measure-dev:
    needs: [build-runner]
    name: Measure Dev Branch
    runs-on: ubuntu-latest
    steps:
      - name: checkout dev
        uses: actions/checkout@v2
      - name: Setup Python
        uses: actions/setup-python@v2.2.2
        with:
          python-version: '3.8'
      - name: install dbt
        run: pip install -r dev-requirements.txt -r editable-requirements.txt
      - name: install hyperfine
        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
      - uses: actions/download-artifact@v2
        with:
          name: runner
      - name: change permissions
        run: chmod +x ./runner
      - name: run
        run: ./runner measure -b dev -p ${{ github.workspace }}/performance/projects/
      - uses: actions/upload-artifact@v2
        with:
          name: dev-results
          path: performance/results/
  # run the performance measurements on the release branch which we use
  # as a performance baseline. This part takes by far the longest, so
  # we do everything we can first so the job fails fast.
  # -----
  # we need to checkout dbt twice in this job: once for the baseline dbt
  # version, and once to get the latest regression testing projects,
  # metrics, and runner code from the develop or current branch so that
  # the calculations match for both versions of dbt we are comparing.
  measure-baseline:
    needs: [build-runner]
    name: Measure Baseline Branch
    runs-on: ubuntu-latest
    steps:
      - name: checkout latest
        uses: actions/checkout@v2
        with:
          ref: '0.20.latest'
      - name: Setup Python
        uses: actions/setup-python@v2.2.2
        with:
          python-version: '3.8'
      - name: move repo up a level
        run: mkdir ${{ github.workspace }}/../baseline/ && cp -r ${{ github.workspace }} ${{ github.workspace }}/../baseline
      - name: "[debug] ls new dbt location"
        run: ls ${{ github.workspace }}/../baseline/dbt/
      # installation creates egg-links so we have to preserve source
      - name: install dbt from new location
        run: cd ${{ github.workspace }}/../baseline/dbt/ && pip install -r dev-requirements.txt -r editable-requirements.txt
      # checkout the current branch to get all the target projects
      # this deletes the old checked out code which is why we had to copy before
      - name: checkout dev
        uses: actions/checkout@v2
      - name: install hyperfine
        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
      - uses: actions/download-artifact@v2
        with:
          name: runner
      - name: change permissions
        run: chmod +x ./runner
      - name: run runner
        run: ./runner measure -b baseline -p ${{ github.workspace }}/performance/projects/
      - uses: actions/upload-artifact@v2
        with:
          name: baseline-results
          path: performance/results/
  # detect regressions on the output generated from measuring
  # the two branches. Exits with non-zero code if a regression is detected.
  calculate-regressions:
    needs: [measure-dev, measure-baseline]
    name: Compare Results
    runs-on: ubuntu-latest
    steps:
      - uses: actions/download-artifact@v2
        with:
          name: dev-results
      - uses: actions/download-artifact@v2
        with:
          name: baseline-results
      - name: "[debug] ls result files"
        run: ls
      - uses: actions/download-artifact@v2
        with:
          name: runner
      - name: change permissions
        run: chmod +x ./runner
      - name: run calculation
        run: ./runner calculate -r ./
        # always attempt to upload the results even if there were regressions found
      - uses: actions/upload-artifact@v2
        if: ${{ always() }}
        with:
          name: final-calculations
          path: ./final_calculations.json
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,7 +22,6 @@
 - Fix for RPC requests that raise a RecursionError when serializing Undefined values as JSON ([#3464](https://github.com/dbt-labs/dbt/issues/3464), [#3687](https://github.com/dbt-labs/dbt/pull/3687))
 ### Under the hood
 - Add performance regression testing [#3602](https://github.com/dbt-labs/dbt/pull/3602)
 - Improve default view and table materialization performance by checking relational cache before attempting to drop temp relations ([#3112](https://github.com/fishtown-analytics/dbt/issues/3112), [#3468](https://github.com/fishtown-analytics/dbt/pull/3468))
 - Add optional `sslcert`, `sslkey`, and `sslrootcert` profile arguments to the Postgres connector. ([#3472](https://github.com/fishtown-analytics/dbt/pull/3472), [#3473](https://github.com/fishtown-analytics/dbt/pull/3473))
 - Move the example project used by `dbt init` into `dbt` repository, to avoid cloning an external repo ([#3005](https://github.com/fishtown-analytics/dbt/pull/3005), [#3474](https://github.com/fishtown-analytics/dbt/pull/3474), [#3536](https://github.com/fishtown-analytics/dbt/pull/3536))
--- a/performance/README.md
+++ b/performance/README.md
@@ -1,18 +0,0 @@
 # Performance Regression Testing
 This directory includes dbt project setups to test on and a test runner written in Rust which runs specific dbt commands on each of the projects. Orchestration is done via the GitHub Action workflow in `/.github/workflows/performance.yml`. The workflow is scheduled to run every night, but it can also be triggered manually.
 The github workflow hardcodes our baseline branch for performance metrics as `0.20.latest`. As future versions become faster, this branch will be updated to hold us to those new standards.
 ## Adding a new dbt project
 Just make a new directory under `performance/projects/`. It will automatically be picked up by the tests.
 ## Adding a new dbt command
 In `runner/src/measure.rs::measure` add a metric to the `metrics` Vec. The Github Action will handle recompilation if you don't have the rust toolchain installed.
 ## Future work
 - add more projects to test different configurations that have been known bottlenecks
 - add more dbt commands to measure
 - possibly using the uploaded json artifacts to store these results so they can be graphed over time
 - reading new metrics from a file so no one has to edit rust source to add them to the suite
 - instead of building the rust every time, we could publish and pull down the latest version.
 - instead of manually setting the baseline version of dbt to test, pull down the latest stable version as the baseline.
--- a/performance/project_config/.user.yml
+++ b/performance/project_config/.user.yml
@@ -1 +0,0 @@
 id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b
--- a/performance/project_config/profiles.yml
+++ b/performance/project_config/profiles.yml
@@ -1,12 +0,0 @@
 default:
  target: dev
  outputs:
    dev:
      type: postgres
      host: localhost
      user: dummy
      password: dummy_password
      port: 5432
      dbname: dummy
      schema: dummy
      threads: 4
--- a/performance/projects/01_dummy_project/dbt_project.yml
+++ b/performance/projects/01_dummy_project/dbt_project.yml
@@ -1,38 +0,0 @@
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: 'my_new_package'
 version: 1.0.0
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: 'default'
 # These configurations specify where dbt should look for different types of files.
 # The `source-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 source-paths: ["models"]
 analysis-paths: ["analysis"] 
 test-paths: ["tests"]
 data-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "target"  # directory which will store compiled SQL files
 clean-targets:         # directories to be removed by `dbt clean`
    - "target"
    - "dbt_modules"
 # You can define configurations for models in the `source-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 # In this example config, we tell dbt to build all models in the example/ directory
 # as views (the default). These settings can be overridden in the individual model files
 # using the `{{ config(...) }}` macro.
 models:
  my_new_package:
      # Applies to all files under models/example/
      example:
          materialized: view
--- a/performance/projects/01_dummy_project/models/path_0/node_0.sql
+++ b/performance/projects/01_dummy_project/models/path_0/node_0.sql
@@ -1 +0,0 @@
 select 1 as id
--- a/performance/projects/01_dummy_project/models/path_0/node_0.yml
+++ b/performance/projects/01_dummy_project/models/path_0/node_0.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_0
 version: 2
--- a/performance/projects/01_dummy_project/models/path_0/node_1.sql
+++ b/performance/projects/01_dummy_project/models/path_0/node_1.sql
@@ -1,3 +0,0 @@
 select 1 as id
 union all
 select * from {{ ref('node_0') }}
--- a/performance/projects/01_dummy_project/models/path_0/node_1.yml
+++ b/performance/projects/01_dummy_project/models/path_0/node_1.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_1
 version: 2
--- a/performance/projects/01_dummy_project/models/path_0/node_2.sql
+++ b/performance/projects/01_dummy_project/models/path_0/node_2.sql
@@ -1,3 +0,0 @@
 select 1 as id
 union all
 select * from {{ ref('node_0') }}
--- a/performance/projects/01_dummy_project/models/path_0/node_2.yml
+++ b/performance/projects/01_dummy_project/models/path_0/node_2.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_2
 version: 2
--- a/performance/projects/02_dummy_project/dbt_project.yml
+++ b/performance/projects/02_dummy_project/dbt_project.yml
@@ -1,38 +0,0 @@
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: 'my_new_package'
 version: 1.0.0
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: 'default'
 # These configurations specify where dbt should look for different types of files.
 # The `source-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 source-paths: ["models"]
 analysis-paths: ["analysis"] 
 test-paths: ["tests"]
 data-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "target"  # directory which will store compiled SQL files
 clean-targets:         # directories to be removed by `dbt clean`
    - "target"
    - "dbt_modules"
 # You can define configurations for models in the `source-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 # In this example config, we tell dbt to build all models in the example/ directory
 # as views (the default). These settings can be overridden in the individual model files
 # using the `{{ config(...) }}` macro.
 models:
  my_new_package:
      # Applies to all files under models/example/
      example:
          materialized: view
--- a/performance/projects/02_dummy_project/models/path_0/node_0.sql
+++ b/performance/projects/02_dummy_project/models/path_0/node_0.sql
@@ -1 +0,0 @@
 select 1 as id
--- a/performance/projects/02_dummy_project/models/path_0/node_0.yml
+++ b/performance/projects/02_dummy_project/models/path_0/node_0.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_0
 version: 2
--- a/performance/projects/02_dummy_project/models/path_0/node_1.sql
+++ b/performance/projects/02_dummy_project/models/path_0/node_1.sql
@@ -1,3 +0,0 @@
 select 1 as id
 union all
 select * from {{ ref('node_0') }}
--- a/performance/projects/02_dummy_project/models/path_0/node_1.yml
+++ b/performance/projects/02_dummy_project/models/path_0/node_1.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_1
 version: 2
--- a/performance/projects/02_dummy_project/models/path_0/node_2.sql
+++ b/performance/projects/02_dummy_project/models/path_0/node_2.sql
@@ -1,3 +0,0 @@
 select 1 as id
 union all
 select * from {{ ref('node_0') }}
--- a/performance/projects/02_dummy_project/models/path_0/node_2.yml
+++ b/performance/projects/02_dummy_project/models/path_0/node_2.yml
@@ -1,11 +0,0 @@
 models:
 - columns:
  - name: id
    tests:
    - unique
    - not_null
    - relationships:
        field: id
        to: node_0
  name: node_2
 version: 2
--- a/performance/results/.gitignore
+++ b/performance/results/.gitignore
@@ -1,5 +0,0 @@
 # all files here are generated results
 *
 # except this one
 !.gitignore
--- a/performance/runner/.gitignore
+++ b/performance/runner/.gitignore
@@ -1,2 +0,0 @@
 target/
 projects/*/logs
--- a/performance/runner/Cargo.lock
+++ b/performance/runner/Cargo.lock
@@ -1,307 +0,0 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "ansi_term"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 dependencies = [
 "winapi",
 ]
 [[package]]
 name = "atty"
 version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
 dependencies = [
 "hermit-abi",
 "libc",
 "winapi",
 ]
 [[package]]
 name = "bitflags"
 version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 [[package]]
 name = "clap"
 version = "2.33.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
 dependencies = [
 "ansi_term",
 "atty",
 "bitflags",
 "strsim",
 "textwrap",
 "unicode-width",
 "vec_map",
 ]
 [[package]]
 name = "either"
 version = "1.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 [[package]]
 name = "heck"
 version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
 dependencies = [
 "unicode-segmentation",
 ]
 [[package]]
 name = "hermit-abi"
 version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
 dependencies = [
 "libc",
 ]
 [[package]]
 name = "itertools"
 version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
 dependencies = [
 "either",
 ]
 [[package]]
 name = "itoa"
 version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 [[package]]
 name = "libc"
 version = "0.2.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
 [[package]]
 name = "proc-macro-error"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
 dependencies = [
 "proc-macro-error-attr",
 "proc-macro2",
 "quote",
 "syn",
 "version_check",
 ]
 [[package]]
 name = "proc-macro-error-attr"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
 dependencies = [
 "proc-macro2",
 "quote",
 "version_check",
 ]
 [[package]]
 name = "proc-macro2"
 version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
 dependencies = [
 "unicode-xid",
 ]
 [[package]]
 name = "quote"
 version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
 dependencies = [
 "proc-macro2",
 ]
 [[package]]
 name = "runner"
 version = "0.1.0"
 dependencies = [
 "itertools",
 "serde",
 "serde_json",
 "structopt",
 "thiserror",
 ]
 [[package]]
 name = "ryu"
 version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
 [[package]]
 name = "serde"
 version = "1.0.127"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8"
 dependencies = [
 "serde_derive",
 ]
 [[package]]
 name = "serde_derive"
 version = "1.0.127"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "serde_json"
 version = "1.0.66"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "336b10da19a12ad094b59d870ebde26a45402e5b470add4b5fd03c5048a32127"
 dependencies = [
 "itoa",
 "ryu",
 "serde",
 ]
 [[package]]
 name = "strsim"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
 [[package]]
 name = "structopt"
 version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69b041cdcb67226aca307e6e7be44c8806423d83e018bd662360a93dabce4d71"
 dependencies = [
 "clap",
 "lazy_static",
 "structopt-derive",
 ]
 [[package]]
 name = "structopt-derive"
 version = "0.4.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7813934aecf5f51a54775e00068c237de98489463968231a51746bbbc03f9c10"
 dependencies = [
 "heck",
 "proc-macro-error",
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "syn"
 version = "1.0.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c"
 dependencies = [
 "proc-macro2",
 "quote",
 "unicode-xid",
 ]
 [[package]]
 name = "textwrap"
 version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
 dependencies = [
 "unicode-width",
 ]
 [[package]]
 name = "thiserror"
 version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "93119e4feac1cbe6c798c34d3a53ea0026b0b1de6a120deef895137c0529bfe2"
 dependencies = [
 "thiserror-impl",
 ]
 [[package]]
 name = "thiserror-impl"
 version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "060d69a0afe7796bf42e9e2ff91f5ee691fb15c53d38b4b62a9a53eb23164745"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn",
 ]
 [[package]]
 name = "unicode-segmentation"
 version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
 [[package]]
 name = "unicode-width"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
 [[package]]
 name = "unicode-xid"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
 [[package]]
 name = "vec_map"
 version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
 [[package]]
 name = "version_check"
 version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
 [[package]]
 name = "winapi"
 version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
 dependencies = [
 "winapi-i686-pc-windows-gnu",
 "winapi-x86_64-pc-windows-gnu",
 ]
 [[package]]
 name = "winapi-i686-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/performance/runner/Cargo.toml
+++ b/performance/runner/Cargo.toml
@@ -1,11 +0,0 @@
 [package]
 name = "runner"
 version = "0.1.0"
 edition = "2018"
 [dependencies]
 itertools = "0.10.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 structopt = "0.3"
 thiserror = "1.0.26"
--- a/performance/runner/src/calculate.rs
+++ b/performance/runner/src/calculate.rs
@@ -1,269 +0,0 @@
 use crate::exceptions::{CalculateError, IOError};
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use std::fs;
 use std::fs::DirEntry;
 use std::path::{Path, PathBuf};
 // This type exactly matches the type of array elements
 // from hyperfine's output. Deriving `Serialize` and `Deserialize`
 // gives us read and write capabilities via json_serde.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurement {
    pub command: String,
    pub mean: f64,
    pub stddev: f64,
    pub median: f64,
    pub user: f64,
    pub system: f64,
    pub min: f64,
    pub max: f64,
    pub times: Vec<f64>,
 }
 // This type exactly matches the type of hyperfine's output.
 // Deriving `Serialize` and `Deserialize` gives us read and
 // write capabilities via json_serde.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Measurements {
    pub results: Vec<Measurement>,
 }
 // Output data from a comparison between runs on the baseline
 // and dev branches.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Data {
    pub threshold: f64,
    pub difference: f64,
    pub baseline: f64,
    pub dev: f64,
 }
 // The full output from a comparison between runs on the baseline
 // and dev branches.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Calculation {
    pub metric: String,
    pub regression: bool,
    pub data: Data,
 }
 // A type to describe which measurement we are working with. This
 // information is parsed from the filename of hyperfine's output.
 #[derive(Debug, Clone, PartialEq)]
 pub struct MeasurementGroup {
    pub version: String,
    pub run: String,
    pub measurement: Measurement,
 }
 // Given two measurements, return all the calculations. Calculations are
 // flagged as regressions or not regressions.
 fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
    let median_threshold = 1.05; // 5% regression threshold
    let median_difference = dev.median / baseline.median;
    let stddev_threshold = 1.20; // 20% regression threshold
    let stddev_difference = dev.stddev / baseline.stddev;
    vec![
        Calculation {
            metric: ["median", metric].join("_"),
            regression: median_difference > median_threshold,
            data: Data {
                threshold: median_threshold,
                difference: median_difference,
                baseline: baseline.median,
                dev: dev.median,
            },
        },
        Calculation {
            metric: ["stddev", metric].join("_"),
            regression: stddev_difference > stddev_threshold,
            data: Data {
                threshold: stddev_threshold,
                difference: stddev_difference,
                baseline: baseline.stddev,
                dev: dev.stddev,
            },
        },
    ]
 }
 // Given a directory, read all files in the directory and return each
 // filename with the deserialized json contents of that file.
 fn measurements_from_files(
    results_directory: &Path,
 ) -> Result<Vec<(PathBuf, Measurements)>, CalculateError> {
    fs::read_dir(results_directory)
        .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?
        .into_iter()
        .map(|entry| {
            let ent: DirEntry = entry
                .or_else(|e| Err(IOError::ReadErr(results_directory.to_path_buf(), Some(e))))
                .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
            Ok(ent.path())
        })
        .collect::<Result<Vec<PathBuf>, CalculateError>>()?
        .iter()
        .filter(|path| {
            path.extension()
                .and_then(|ext| ext.to_str())
                .map_or(false, |ext| ext.ends_with("json"))
        })
        .map(|path| {
            fs::read_to_string(path)
                .or_else(|e| Err(IOError::BadFileContentsErr(path.clone(), Some(e))))
                .or_else(|e| Err(CalculateError::CalculateIOError(e)))
                .and_then(|contents| {
                    serde_json::from_str::<Measurements>(&contents)
                        .or_else(|e| Err(CalculateError::BadJSONErr(path.clone(), Some(e))))
                })
                .map(|m| (path.clone(), m))
        })
        .collect()
 }
 // Given a list of filename-measurement pairs, detect any regressions by grouping
 // measurements together by filename.
 fn calculate_regressions(
    measurements: &[(&PathBuf, &Measurement)],
 ) -> Result<Vec<Calculation>, CalculateError> {
    /*
        Strategy of this function body:
        1. [Measurement] -> [MeasurementGroup]
        2. Sort the MeasurementGroups
        3. Group the MeasurementGroups by "run"
        4. Call `calculate` with the two resulting Measurements as input
    */
    let mut measurement_groups: Vec<MeasurementGroup> = measurements
        .iter()
        .map(|(p, m)| {
            p.file_name()
                .ok_or_else(|| IOError::MissingFilenameErr(p.to_path_buf()))
                .and_then(|name| {
                    name.to_str()
                        .ok_or_else(|| IOError::FilenameNotUnicodeErr(p.to_path_buf()))
                })
                .map(|name| {
                    let parts: Vec<&str> = name.split("_").collect();
                    MeasurementGroup {
                        version: parts[0].to_owned(),
                        run: parts[1..].join("_"),
                        measurement: (*m).clone(),
                    }
                })
        })
        .collect::<Result<Vec<MeasurementGroup>, IOError>>()
        .or_else(|e| Err(CalculateError::CalculateIOError(e)))?;
    measurement_groups.sort_by(|x, y| (&x.run, &x.version).cmp(&(&y.run, &y.version)));
    // locking up mutation
    let sorted_measurement_groups = measurement_groups;
    let calculations: Vec<Calculation> = sorted_measurement_groups
        .iter()
        .group_by(|x| &x.run)
        .into_iter()
        .map(|(_, g)| {
            let mut groups: Vec<&MeasurementGroup> = g.collect();
            groups.sort_by(|x, y| x.version.cmp(&y.version));
            match groups.len() {
                2 => {
                    let dev = &groups[1];
                    let baseline = &groups[0];
                    if dev.version == "dev" && baseline.version == "baseline" {
                        Ok(calculate(&dev.run, &dev.measurement, &baseline.measurement))
                    } else {
                        Err(CalculateError::BadBranchNameErr(
                            baseline.version.clone(),
                            dev.version.clone(),
                        ))
                    }
                }
                i => {
                    let gs: Vec<MeasurementGroup> = groups.into_iter().map(|x| x.clone()).collect();
                    Err(CalculateError::BadGroupSizeErr(i, gs))
                }
            }
        })
        .collect::<Result<Vec<Vec<Calculation>>, CalculateError>>()?
        .concat();
    Ok(calculations)
 }
 // Top-level function. Given a path for the result directory, call the above
 // functions to compare and collect calculations. Calculations include both
 // metrics that fall within the threshold and regressions.
 pub fn regressions(results_directory: &PathBuf) -> Result<Vec<Calculation>, CalculateError> {
    measurements_from_files(Path::new(&results_directory)).and_then(|v| {
        // exit early with an Err if there are no results to process
        if v.len() <= 0 {
            Err(CalculateError::NoResultsErr(results_directory.clone()))
        // we expect two runs for each project-metric pairing: one for each branch, baseline
        // and dev. An odd result count is unexpected.
        } else if v.len() % 2 == 1 {
            Err(CalculateError::OddResultsCountErr(
                v.len(),
                results_directory.clone(),
            ))
        } else {
            // otherwise, we can do our comparisons
            let measurements = v
                .iter()
                // the way we're running these, the files will each contain exactly one measurement, hence `results[0]`
                .map(|(p, ms)| (p, &ms.results[0]))
                .collect::<Vec<(&PathBuf, &Measurement)>>();
            calculate_regressions(&measurements[..])
        }
    })
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn detects_5_percent_regression() {
        let dev = Measurement {
            command: "some command".to_owned(),
            mean: 1.06,
            stddev: 1.06,
            median: 1.06,
            user: 1.06,
            system: 1.06,
            min: 1.06,
            max: 1.06,
            times: vec![],
        };
        let baseline = Measurement {
            command: "some command".to_owned(),
            mean: 1.00,
            stddev: 1.00,
            median: 1.00,
            user: 1.00,
            system: 1.00,
            min: 1.00,
            max: 1.00,
            times: vec![],
        };
        let calculations = calculate("test_metric", &dev, &baseline);
        let regressions: Vec<&Calculation> =
            calculations.iter().filter(|calc| calc.regression).collect();
        // expect one regression for median
        println!("{:#?}", regressions);
        assert_eq!(regressions.len(), 1);
        assert_eq!(regressions[0].metric, "median_test_metric");
    }
 }
--- a/performance/runner/src/exceptions.rs
+++ b/performance/runner/src/exceptions.rs
@@ -1,155 +0,0 @@
 use crate::calculate::*;
 use std::io;
 #[cfg(test)]
 use std::path::Path;
 use std::path::PathBuf;
 use thiserror::Error;
 // Custom IO Error messages for the IO errors we encounter.
 // New constructors should be added to wrap any new IO errors.
 // The desired output of these errors is tested below.
 #[derive(Debug, Error)]
 pub enum IOError {
    #[error("ReadErr: The file cannot be read.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
    ReadErr(PathBuf, Option<io::Error>),
    #[error("MissingFilenameErr: The path provided does not specify a file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
    MissingFilenameErr(PathBuf),
    #[error("FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.\nFilepath: {}", .0.to_string_lossy().into_owned())]
    FilenameNotUnicodeErr(PathBuf),
    #[error("BadFileContentsErr: Check that the file exists and is readable.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
    BadFileContentsErr(PathBuf, Option<io::Error>),
    #[error("CommandErr: System command failed to run.\nOriginating Exception: {}", .0.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
    CommandErr(Option<io::Error>),
 }
 // Custom Error messages for the error states we could encounter
 // during calculation, and are not prevented at compile time. New
 // constructors should be added for any new error situations that
 // come up. The desired output of these errors is tested below.
 #[derive(Debug, Error)]
 pub enum CalculateError {
    #[error("BadJSONErr: JSON in file cannot be deserialized as expected.\nFilepath: {}\nOriginating Exception: {}", .0.to_string_lossy().into_owned(), .1.as_ref().map_or("None".to_owned(), |e| format!("{}", e)))]
    BadJSONErr(PathBuf, Option<serde_json::Error>),
    #[error("{}", .0)]
    CalculateIOError(IOError),
    #[error("NoResultsErr: The results directory has no json files in it.\nFilepath: {}", .0.to_string_lossy().into_owned())]
    NoResultsErr(PathBuf),
    #[error("OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.\nFile Count: {}\nFilepath: {}", .0, .1.to_string_lossy().into_owned())]
    OddResultsCountErr(usize, PathBuf),
    #[error("BadGroupSizeErr: Expected two results per group, one for each branch-project pair.\nCount: {}\nGroup: {:?}", .0, .1.into_iter().map(|group| (&group.version[..], &group.run[..])).collect::<Vec<(&str, &str)>>())]
    BadGroupSizeErr(usize, Vec<MeasurementGroup>),
    #[error("BadBranchNameErr: Branch names must be 'baseline' and 'dev'.\nFound: {}, {}", .0, .1)]
    BadBranchNameErr(String, String),
 }
 // Tests for exceptions
 #[cfg(test)]
 mod tests {
    use super::*;
    // Tests the output fo io error messages. There should be at least one per enum constructor.
    #[test]
    fn test_io_error_messages() {
        let pairs = vec![
            (
                IOError::ReadErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                r#"ReadErr: The file cannot be read.
 Filepath: dummy/path/file.json
 Originating Exception: None"#,
            ),
            (
                IOError::MissingFilenameErr(Path::new("dummy/path/no_file/").to_path_buf()),
                r#"MissingFilenameErr: The path provided does not specify a file.
 Filepath: dummy/path/no_file/"#,
            ),
            (
                IOError::FilenameNotUnicodeErr(Path::new("dummy/path/no_file/").to_path_buf()),
                r#"FilenameNotUnicodeErr: The filename is not expressible in unicode. Consider renaming the file.
 Filepath: dummy/path/no_file/"#,
            ),
            (
                IOError::BadFileContentsErr(
                    Path::new("dummy/path/filenotexist.json").to_path_buf(),
                    None,
                ),
                r#"BadFileContentsErr: Check that the file exists and is readable.
 Filepath: dummy/path/filenotexist.json
 Originating Exception: None"#,
            ),
            (
                IOError::CommandErr(None),
                r#"CommandErr: System command failed to run.
 Originating Exception: None"#,
            ),
        ];
        for (err, msg) in pairs {
            assert_eq!(format!("{}", err), msg)
        }
    }
    // Tests the output fo calculate error messages. There should be at least one per enum constructor.
    #[test]
    fn test_calculate_error_messages() {
        let pairs = vec![
            (
                CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
 Originating Exception: None"#,
            ),
            (
                CalculateError::BadJSONErr(Path::new("dummy/path/file.json").to_path_buf(), None),
                r#"BadJSONErr: JSON in file cannot be deserialized as expected.
 Filepath: dummy/path/file.json
 Originating Exception: None"#,
            ),
            (
                CalculateError::NoResultsErr(Path::new("dummy/path/no_file/").to_path_buf()),
                r#"NoResultsErr: The results directory has no json files in it.
 Filepath: dummy/path/no_file/"#,
            ),
            (
                CalculateError::OddResultsCountErr(
                    3,
                    Path::new("dummy/path/no_file/").to_path_buf(),
                ),
                r#"OddResultsCountErr: The results directory has an odd number of results in it. Expected an even number.
 File Count: 3
 Filepath: dummy/path/no_file/"#,
            ),
            (
                CalculateError::BadGroupSizeErr(
                    1,
                    vec![MeasurementGroup {
                        version: "dev".to_owned(),
                        run: "some command".to_owned(),
                        measurement: Measurement {
                            command: "some command".to_owned(),
                            mean: 1.0,
                            stddev: 1.0,
                            median: 1.0,
                            user: 1.0,
                            system: 1.0,
                            min: 1.0,
                            max: 1.0,
                            times: vec![1.0, 1.1, 0.9, 1.0, 1.1, 0.9, 1.1],
                        },
                    }],
                ),
                r#"BadGroupSizeErr: Expected two results per group, one for each branch-project pair.
 Count: 1
 Group: [("dev", "some command")]"#,
            ),
            (
                CalculateError::BadBranchNameErr("boop".to_owned(), "noop".to_owned()),
                r#"BadBranchNameErr: Branch names must be 'baseline' and 'dev'.
 Found: boop, noop"#,
            ),
        ];
        for (err, msg) in pairs {
            assert_eq!(format!("{}", err), msg)
        }
    }
 }
--- a/performance/runner/src/main.rs
+++ b/performance/runner/src/main.rs
@@ -1,119 +0,0 @@
 extern crate structopt;
 mod calculate;
 mod exceptions;
 mod measure;
 use crate::calculate::Calculation;
 use crate::exceptions::CalculateError;
 use std::fs::File;
 use std::io::Write;
 use std::path::PathBuf;
 use structopt::StructOpt;
 // This type defines the commandline interface and is generated
 // by `derive(StructOpt)`
 #[derive(Clone, Debug, StructOpt)]
 #[structopt(name = "performance", about = "performance regression testing runner")]
 enum Opt {
    #[structopt(name = "measure")]
    Measure {
        #[structopt(parse(from_os_str))]
        #[structopt(short)]
        projects_dir: PathBuf,
        #[structopt(short)]
        branch_name: String,
    },
    #[structopt(name = "calculate")]
    Calculate {
        #[structopt(parse(from_os_str))]
        #[structopt(short)]
        results_dir: PathBuf,
    },
 }
 // enables proper useage of exit() in main.
 // https://doc.rust-lang.org/std/process/fn.exit.html#examples
 //
 // This is where all the printing should happen. Exiting happens
 // in main, and module functions should only return values.
 fn run_app() -> Result<i32, CalculateError> {
    // match what the user inputs from the cli
    match Opt::from_args() {
        // measure subcommand
        Opt::Measure {
            projects_dir,
            branch_name,
        } => {
            // if there are any nonzero exit codes from the hyperfine runs,
            // return the first one. otherwise return zero.
            measure::measure(&projects_dir, &branch_name)
                .or_else(|e| Err(CalculateError::CalculateIOError(e)))?
                .iter()
                .map(|status| status.code())
                .flatten()
                .filter(|code| *code != 0)
                .collect::<Vec<i32>>()
                .get(0)
                .map_or(Ok(0), |x| {
                    println!("Main: a child process exited with a nonzero status code.");
                    Ok(*x)
                })
        }
        // calculate subcommand
        Opt::Calculate { results_dir } => {
            // get all the calculations or gracefully show the user an exception
            let calculations = calculate::regressions(&results_dir)?;
            // print all calculations to stdout so they can be easily debugged
            // via CI.
            println!(":: All Calculations ::\n");
            for c in &calculations {
                println!("{:#?}\n", c);
            }
            // indented json string representation of the calculations array
            let json_calcs = serde_json::to_string_pretty(&calculations)
                .expect("Main: Failed to serialize calculations to json");
            // create the empty destination file, and write the json string
            let outfile = &mut results_dir.into_os_string();
            outfile.push("/final_calculations.json");
            let mut f = File::create(outfile).expect("Main: Unable to create file");
            f.write_all(json_calcs.as_bytes())
                .expect("Main: Unable to write data");
            // filter for regressions
            let regressions: Vec<&Calculation> =
                calculations.iter().filter(|c| c.regression).collect();
            // return a non-zero exit code if there are regressions
            match regressions[..] {
                [] => {
                    println!("congrats! no regressions :)");
                    Ok(0)
                }
                _ => {
                    // print all calculations to stdout so they can be easily
                    // debugged via CI.
                    println!(":: Regressions Found ::\n");
                    for r in regressions {
                        println!("{:#?}\n", r);
                    }
                    Ok(1)
                }
            }
        }
    }
 }
 fn main() {
    std::process::exit(match run_app() {
        Ok(code) => code,
        Err(err) => {
            eprintln!("{}", err);
            1
        }
    });
 }
--- a/performance/runner/src/measure.rs
+++ b/performance/runner/src/measure.rs
@@ -1,89 +0,0 @@
 use crate::exceptions::IOError;
 use std::fs;
 use std::path::PathBuf;
 use std::process::{Command, ExitStatus};
 // `Metric` defines a dbt command that we want to measure on both the
 // baseline and dev branches.
 #[derive(Debug, Clone)]
 struct Metric<'a> {
    name: &'a str,
    prepare: &'a str,
    cmd: &'a str,
 }
 impl Metric<'_> {
    // Returns the proper filename for the hyperfine output for this metric.
    fn outfile(&self, project: &str, branch: &str) -> String {
        [branch, "_", self.name, "_", project, ".json"].join("")
    }
 }
 // Calls hyperfine via system command, and returns all the exit codes for each hyperfine run.
 pub fn measure<'a>(
    projects_directory: &PathBuf,
    dbt_branch: &str,
 ) -> Result<Vec<ExitStatus>, IOError> {
    /*
        Strategy of this function body:
        1. Read all directory names in `projects_directory`
        2. Pair `n` projects with `m` metrics for a total of n*m pairs
        3. Run hyperfine on each project-metric pair
    */
    // To add a new metric to the test suite, simply define it in this list:
    // TODO: This could be read from a config file in a future version.
    let metrics: Vec<Metric> = vec![Metric {
        name: "parse",
        prepare: "rm -rf target/",
        cmd: "dbt parse --no-version-check",
    }];
    fs::read_dir(projects_directory)
        .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
        .map(|entry| {
            let path = entry
                .or_else(|e| Err(IOError::ReadErr(projects_directory.to_path_buf(), Some(e))))?
                .path();
            let project_name: String = path
                .file_name()
                .ok_or_else(|| IOError::MissingFilenameErr(path.clone().to_path_buf()))
                .and_then(|x| {
                    x.to_str()
                        .ok_or_else(|| IOError::FilenameNotUnicodeErr(path.clone().to_path_buf()))
                })?
                .to_owned();
            // each project-metric pair we will run
            let pairs = metrics
                .iter()
                .map(|metric| (path.clone(), project_name.clone(), metric))
                .collect::<Vec<(PathBuf, String, &Metric<'a>)>>();
            Ok(pairs)
        })
        .collect::<Result<Vec<Vec<(PathBuf, String, &Metric<'a>)>>, IOError>>()?
        .concat()
        .iter()
        // run hyperfine on each pairing
        .map(|(path, project_name, metric)| {
            Command::new("hyperfine")
                .current_dir(path)
                // warms filesystem caches by running the command first without counting it.
                // alternatively we could clear them before each run
                .arg("--warmup")
                .arg("1")
                .arg("--prepare")
                .arg(metric.prepare)
                .arg([metric.cmd, " --profiles-dir ", "../../project_config/"].join(""))
                .arg("--export-json")
                .arg(["../../results/", &metric.outfile(project_name, dbt_branch)].join(""))
                // this prevents hyperfine from capturing dbt's output.
                // Noisy, but good for debugging when tests fail.
                .arg("--show-output")
                .status() // use spawn() here instead for more information
                .or_else(|e| Err(IOError::CommandErr(Some(e))))
        })
        .collect()
 }