add manually modeled 1.0.3 baseline

init sampling action
init modeling action
2025-12-20 06:51:29 +00:00 · 2022-03-16 12:10:02 -04:00 · 2022-03-16 11:50:20 -04:00 · 2022-03-16 11:49:09 -04:00
3 changed files with 488 additions and 0 deletions
--- a/.github/workflows/model_performance.yml
+++ b/.github/workflows/model_performance.yml
@@ -0,0 +1,310 @@
+# **what?**
+# This workflow models the performance characteristics of a point in time in dbt.
+# It runs specific dbt commands on committed projects multiple times to create and
+# commit information about the distribution to the current branch. For more information
+# see the readme in the performance module at /performance/README.md.
+#
+# **why?**
+# When developing new features, we can take quick performance samples and compare
+# them against the commited baseline measurements produced by this workflow to detect
+# some performance regressions at development time before they reach users.
+#
+# **when?**
+# This is only run once directly after each release. If for some reason the results of
+# a run are not satisfactory, it can also be triggered manually.
+
+name: Model Performance Characteristics
+
+on:
+  # runs after non-prereleases are published.
+  release:
+    types: [released]
+  # run manually from the actions tab
+  workflow_dispatch:
+    inputs:
+      # if we ever want to model pre-releases, we would need to handle pre-release version numbers
+      # here and in the runner. Even if we use a semver library I suspect it will be rather difficult.
+      release_id:
+        description: '(^^ always run from main) dbt version to model (must be non-prerelease in Pypi)'
+        default: 9.9.9
+        required: true
+      open_prs:
+        description: Open PRs to main and release branch? (branch name inferred from provided version) (yes/no)
+        default: 'no'
+        required: true
+
+env:
+  RUNNER_CACHE_PATH: performance/runner/target/release/runner
+
+# both jobs need to write
+permissions:
+  contents: write
+
+jobs:
+  latest-runner:
+    name: Build or Fetch Runner
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-D warnings"
+    outputs:
+      cache_key: ${{ steps.variables.outputs.cache_key }}
+      release_id: ${{ steps.variables.outputs.release_id }}
+      open_prs: ${{ steps.variables.outputs.open_prs }}
+      release_branch: ${{ steps.variables.outputs.release_branch }}
+    steps:
+
+      # explicitly checkout the performance runner from main regardless of which
+      # version we are modeling.
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          ref: main
+
+      # collect all the variables that need to be used in subsequent jobs
+      - name: Set Variables
+        id: variables
+        run: |
+          # create a cache key that will be used in the next job. without this the
+          # next job would have to checkout from main an hash the files itself.
+          echo "::set-output name=cache_key::${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}"
+
+          # this value gets used to create other values locally so it gets a local definition too.
+          local_release_id=''
+          # users are prompted to input with the correct format
+          if [[ $GITHUB_EVENT_NAME == "workflow_dispatch" ]]; then
+            echo "Workflow dispatch event detected"
+            local_release_id=${{github.event.inputs.release_id}}
+            echo "::set-output name=release_id::${{github.event.inputs.release_id}}"
+            echo "::set-output name=open_prs::${{github.event.inputs.open_prs}}"
+          # release.tag_name has a v prepended. we must remove it.
+          else
+            echo "release event detected"
+            with_v=${{github.event.release.tag_name}}
+            without_v=${with_v:1}
+            local_release_id=$without_v
+            echo "::set-output name=release_id::$without_v"
+            echo "::set-output name=open_prs::yes"
+          fi
+
+          # string manipulation to get the branch name. It can't be discovered from the github api
+          # for release triggers so we're stuck with this. If we change our branch naming strategy
+          # we have to update this code. example: 1.0.0 -> 1.0.latest. the sed command takes into
+          # account multiple digits like 1.0.999 -> 1.0.latest
+
+          no_patch=$(sed "s|\(.*\)\..*|\1|" <<< $local_release_id)
+          branch_name="${no_patch}.latest"
+          echo "::set-output name=release_branch::$branch_name"
+          echo "release branch is inferred to be ${branch_name}"
+
+      # attempts to access a previously cached runner
+      - uses: actions/cache@v2
+        id: cache
+        with:
+          path: ${{ env.RUNNER_CACHE_PATH }}
+          key: ${{ steps.variables.outputs.cache_key }}
+
+      - name: Fetch Rust Toolchain
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+
+      - name: Add fmt
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: rustup component add rustfmt
+
+      - name: Cargo fmt
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --manifest-path performance/runner/Cargo.toml --all -- --check
+
+      - name: Test
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path performance/runner/Cargo.toml
+
+      - name: Build (optimized)
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/runner/Cargo.toml
+      # the cache action automatically caches this binary at the end of the job
+
+  model:
+    # depends on `latest-runner` as a separate job so that failures in this job do not prevent
+    # a successfully tested and built binary from being cached.
+    needs: [latest-runner]
+    name: Model a release
+    runs-on: ubuntu-latest
+    steps:
+
+      - name: '[DEBUG] print variables'
+        run: |
+          echo "all variables defined in latest-runner > Set Variables > outputs"
+          echo "cache_key:      ${{ needs.latest-runner.outputs.cache_key }}"
+          echo "release_id:     ${{ needs.latest-runner.outputs.release_id }}"
+          echo "open_prs:       ${{ needs.latest-runner.outputs.open_prs }}"
+          echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
+
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: "3.8"
+
+      - name: Install dbt
+        run: pip install dbt-postgres==${{ needs.latest-runner.outputs.release_id }}
+
+      - name: Install Hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+
+      # explicitly checkout main to get the latest project definitions
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          ref: main
+
+      # this was built in the previous job so it will be there.
+      - name: Fetch Runner
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: ${{ env.RUNNER_CACHE_PATH }}
+          key: ${{ needs.latest-runner.outputs.cache_key }}
+
+      - name: Move Runner
+        run: mv performance/runner/target/release/runner performance/app
+
+      - name: Change Runner Permissions
+        run: chmod +x ./performance/app
+
+      - name: '[DEBUG] ls baseline directory before run'
+        run: ls -R performance/baselines/
+
+      # `${{ github.workspace }}` is used to pass the absolute path
+      # TODO CHANGE NUMBER OF RUNS BEFORE MERGING
+      # TODO this isn't putting the baseline in the right directory. it's putting it one level up.
+      - name: Run Measurement
+        run: mkdir ${{ github.workspace }}/performance/tmp/ && mkdir -p performance/baselines/${{ needs.latest-runner.outputs.release_id }}/ && performance/app model -v ${{ needs.latest-runner.outputs.release_id }} -b ${{ github.workspace }}/performance/baselines/ -p ${{ github.workspace }}/performance/projects/ -t ${{ github.workspace }}/performance/tmp/ -n 2
+
+      - name: '[DEBUG] ls baseline directory after run'
+        run: ls -R performance/baselines/
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: baseline
+          path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}/
+
+  pr-release-branch:
+    if: ${{ needs.latest-runner.outputs.open_prs == 'yes' && needs.latest-runner.outputs.release_branch != 'main' }}
+    # depends on `model` as a separate job so that the baseline can be committed to more than one branch
+    # i.e. release branch and main
+    needs: [latest-runner, model]
+    name: Open PR for release branch (if specified)
+    runs-on: ubuntu-latest
+    steps:
+
+      - name: '[DEBUG] print variables'
+        run: |
+          echo "all variables defined in latest-runner > Set Variables > outputs"
+          echo "cache_key:      ${{ needs.latest-runner.outputs.cache_key }}"
+          echo "release_id:     ${{ needs.latest-runner.outputs.release_id }}"
+          echo "open_prs:       ${{ needs.latest-runner.outputs.open_prs }}"
+          echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
+
+
+      # explicitly checkout the branch specified during dispatch
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          ref: ${{ needs.latest-runner.outputs.release_branch }}
+
+      - name: '[DEBUG] ls baselines before artifact download'
+        run: ls -R performance/baselines/
+
+      - uses: actions/download-artifact@v3
+        with:
+          name: baseline
+          path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}
+
+      - name: '[DEBUG] ls baselines after artifact download'
+        run: ls -R performance/baselines/
+
+      - name: Make Branch
+        uses: EndBug/add-and-commit@v8
+        with:
+          add: performance/baselines
+          author_name: 'Github Build Bot'
+          author_email: 'buildbot@fishtownanalytics.com'
+          message: 'adding performance baseline for ${{ needs.latest-runner.outputs.release_id }}'
+          new_branch: 'performance-bot/release_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
+          push: false
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v3
+        with:
+          author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
+          draft: true
+          base: ${{ needs.latest-runner.outputs.release_branch }}
+          title: 'Adding performance modeling for ${{needs.latest-runner.outputs.release_id}}'
+          branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
+
+
+  pr-main-branch:
+    if: ${{ needs.latest-runner.outputs.open_prs == 'yes' }}
+    # depends on `model` as a separate job so that the baseline can be committed to more than one branch
+    # i.e. release branch and main
+    needs: [latest-runner, model]
+    name: Open PR for main (if specified)
+    runs-on: ubuntu-latest
+    steps:
+
+      - name: '[DEBUG] print variables'
+        run: |
+          echo "all variables defined in latest-runner > Set Variables > outputs"
+          echo "cache_key:      ${{ needs.latest-runner.outputs.cache_key }}"
+          echo "release_id:     ${{ needs.latest-runner.outputs.release_id }}"
+          echo "open_prs:       ${{ needs.latest-runner.outputs.open_prs }}"
+          echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
+
+      # explicitly checkout main
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          ref: main
+
+      - name: '[DEBUG] ls baselines before artifact download'
+        run: ls -R performance/baselines/
+
+      - uses: actions/download-artifact@v3
+        with:
+          name: baseline
+          path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}
+
+      - name: '[DEBUG] ls baselines after artifact download'
+        run: ls -R performance/baselines/
+
+      - name: Make Branch
+        uses: EndBug/add-and-commit@v8
+        with:
+          add: performance/baselines
+          author_name: 'Github Build Bot'
+          author_email: 'buildbot@fishtownanalytics.com'
+          message: 'adding performance baseline for ${{ needs.latest-runner.outputs.release_id }}'
+          new_branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
+          push: false
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v3
+        with:
+          author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
+          draft: true
+          base: main
+          title: 'Adding performance modeling for ${{needs.latest-runner.outputs.release_id}}'
+          branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
--- a/.github/workflows/sample_performance.yml
+++ b/.github/workflows/sample_performance.yml
@@ -0,0 +1,138 @@
+# **what?**
+# This workflow samples performance characteristics of your commit and compares them to
+# the most recent release. If they are significanly off from the previously recorded
+# distribution it will trigger a failure. Do not rerun these failures to get them to pass.
+# There is more information in the performance readme about how to handle failures.
+#
+# **why?**
+# This will help us potentially catch new performance regressions in development before
+# releasing a new version.
+#
+# **whent?**
+# This runs on every commit in PRs.
+#
+
+
+name: Performance Regression Tests
+# Schedule triggers
+on:
+  # sampling is fast enough to run on every commit in PRs
+  pull_request:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+env:
+  RUNNER_CACHE_PATH: performance/runner/target/release/runner
+
+jobs:
+  latest-runner:
+    name: Build or Fetch Performance Runner
+    runs-on: ubuntu-latest
+    env:
+      RUSTFLAGS: "-D warnings"
+    steps:
+
+      # specifically checksout main so that when we're sampling on commits to
+      # patch releases, we're using the latest runner code not whatever is in the working branch.
+      #
+      # the with clause should be commented out if you're working on the runner
+      # and want to see output from your code. it's pulling from main, not what you're
+      # working on.
+      - name: Checkout
+        uses: actions/checkout@v2
+        # TODO uncomment when done developing this.
+        # with:
+        #   ref: main
+
+      # create a cache key that will be used in the next job. without this the
+      # next job would have to checkout from main an hash the files itself.
+      - name: Create Cache Key
+        id: cacheKey
+        run: echo "::set-output name=key::${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}"
+        working-directory: ${{ env.RUNNER_CACHE_PATH }}
+
+      # attempts to access a previously cached runner
+      #
+      # unless you're developing the runner, it should be in the cache.
+      - uses: actions/cache@v2
+        id: cache
+        with:
+          path: ${{ env.RUNNER_CACHE_PATH }}
+          key: ${{ steps.cacheKey.outputs.key }}
+
+      - name: Fetch Rust Toolchain
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+
+      - name: Add fmt
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: rustup component add rustfmt
+
+      - name: Cargo fmt
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --manifest-path performance/runner/Cargo.toml --all -- --check
+
+      - name: Test
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path performance/runner/Cargo.toml
+
+      - name: Build (optimized)
+        if: steps.cache.outputs.cache-hit != 'true'
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --release --manifest-path performance/runner/Cargo.toml
+      # the cache action automatically caches this binary at the end of the job
+
+  sample:
+    # depends on `latest-runner` as a separate job so that failures in this job do not prevent
+    # a successfully tested and built binary from being cached. Also so we can checkout dbt
+    # from THIS branch not main like we want for the runner.
+    needs: [latest-runner]
+    name: Compare Performance Samples
+    runs-on: ubuntu-latest
+    steps:
+
+      # checkout this branch not main
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Setup Python
+        uses: actions/setup-python@v2.2.2
+        with:
+          python-version: "3.8"
+
+      - name: Install dbt
+        run: pip install -r dev-requirements.txt -r editable-requirements.txt
+
+      - name: Install Hyperfine
+        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
+
+      # this was built in the previous job so it will be there.
+      - name: Fetch Runner
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: ${{ env.RUNNER_CACHE_PATH }}
+          key: ${{ steps.cacheKey.outputs.key }}
+
+      - name: Move Runner
+        run: mv performance/runner/target/release/runner ./performance/app
+
+      - name: Change Runner Permissions
+        run: chmod +x ./performance/app
+
+      # `${{ github.workspace }}` is used to pass the absolute path
+      - name: Run Measurement
+        run: mkdir tmp && ./app sample -b ${{ github.workspace }}/performance/baselines -p ${{ github.workspace }}/performance/projects -o ${{ github.workspace }}/tmp
+        working-directory: ${{ github.workspace }}/performance/
--- a/performance/baselines/1.0.3/parse___2000_models.json
+++ b/performance/baselines/1.0.3/parse___2000_models.json
@@ -0,0 +1,40 @@
+{
+  "version": "1.0.3",
+  "metric": {
+    "name": "parse",
+    "project_name": "01_2000_simple_models"
+  },
+  "ts": "2022-03-04T00:02:52.657727515Z",
+  "measurement": {
+    "command": "dbt parse --no-version-check --profiles-dir ../../project_config/",
+    "mean": 41.224566760615,
+    "stddev": 0.252468634424254,
+    "median": 41.182836243915,
+    "user": 40.70073678499999,
+    "system": 0.61185062,
+    "min": 40.89372129691501,
+    "max": 41.68176405591501,
+    "times": [
+      41.397582801915,
+      41.618822256915,
+      41.374914350915,
+      41.68176405591501,
+      41.255119986915,
+      41.528348636915,
+      41.238762892915,
+      40.950121934915,
+      41.388716648915,
+      41.62938069991501,
+      41.139914502915,
+      41.114225200915,
+      41.045012222915,
+      41.01039839391501,
+      40.915296414915,
+      41.006528646915,
+      40.89372129691501,
+      40.951454721915,
+      41.125491559915,
+      41.225757984915
+    ]
+  }
+}
Author	SHA1	Message	Date
Nathaniel May	0a2dbf3721	add manually modeled 1.0.3 baseline	2022-03-16 12:10:02 -04:00
Nathaniel May	a138f5f42b	init sampling action	2022-03-16 11:50:20 -04:00
Nathaniel May	c5672b564b	init modeling action	2022-03-16 11:49:09 -04:00