mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-20 06:51:29 +00:00
Compare commits
3 Commits
enable-pos
...
nate/regre
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a2dbf3721 | ||
|
|
a138f5f42b | ||
|
|
c5672b564b |
310
.github/workflows/model_performance.yml
vendored
Normal file
310
.github/workflows/model_performance.yml
vendored
Normal file
@@ -0,0 +1,310 @@
|
||||
# **what?**
|
||||
# This workflow models the performance characteristics of a point in time in dbt.
|
||||
# It runs specific dbt commands on committed projects multiple times to create and
|
||||
# commit information about the distribution to the current branch. For more information
|
||||
# see the readme in the performance module at /performance/README.md.
|
||||
#
|
||||
# **why?**
|
||||
# When developing new features, we can take quick performance samples and compare
|
||||
# them against the commited baseline measurements produced by this workflow to detect
|
||||
# some performance regressions at development time before they reach users.
|
||||
#
|
||||
# **when?**
|
||||
# This is only run once directly after each release. If for some reason the results of
|
||||
# a run are not satisfactory, it can also be triggered manually.
|
||||
|
||||
name: Model Performance Characteristics
|
||||
|
||||
on:
|
||||
# runs after non-prereleases are published.
|
||||
release:
|
||||
types: [released]
|
||||
# run manually from the actions tab
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
# if we ever want to model pre-releases, we would need to handle pre-release version numbers
|
||||
# here and in the runner. Even if we use a semver library I suspect it will be rather difficult.
|
||||
release_id:
|
||||
description: '(^^ always run from main) dbt version to model (must be non-prerelease in Pypi)'
|
||||
default: 9.9.9
|
||||
required: true
|
||||
open_prs:
|
||||
description: Open PRs to main and release branch? (branch name inferred from provided version) (yes/no)
|
||||
default: 'no'
|
||||
required: true
|
||||
|
||||
env:
|
||||
RUNNER_CACHE_PATH: performance/runner/target/release/runner
|
||||
|
||||
# both jobs need to write
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
latest-runner:
|
||||
name: Build or Fetch Runner
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RUSTFLAGS: "-D warnings"
|
||||
outputs:
|
||||
cache_key: ${{ steps.variables.outputs.cache_key }}
|
||||
release_id: ${{ steps.variables.outputs.release_id }}
|
||||
open_prs: ${{ steps.variables.outputs.open_prs }}
|
||||
release_branch: ${{ steps.variables.outputs.release_branch }}
|
||||
steps:
|
||||
|
||||
# explicitly checkout the performance runner from main regardless of which
|
||||
# version we are modeling.
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
ref: main
|
||||
|
||||
# collect all the variables that need to be used in subsequent jobs
|
||||
- name: Set Variables
|
||||
id: variables
|
||||
run: |
|
||||
# create a cache key that will be used in the next job. without this the
|
||||
# next job would have to checkout from main an hash the files itself.
|
||||
echo "::set-output name=cache_key::${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}"
|
||||
|
||||
# this value gets used to create other values locally so it gets a local definition too.
|
||||
local_release_id=''
|
||||
# users are prompted to input with the correct format
|
||||
if [[ $GITHUB_EVENT_NAME == "workflow_dispatch" ]]; then
|
||||
echo "Workflow dispatch event detected"
|
||||
local_release_id=${{github.event.inputs.release_id}}
|
||||
echo "::set-output name=release_id::${{github.event.inputs.release_id}}"
|
||||
echo "::set-output name=open_prs::${{github.event.inputs.open_prs}}"
|
||||
# release.tag_name has a v prepended. we must remove it.
|
||||
else
|
||||
echo "release event detected"
|
||||
with_v=${{github.event.release.tag_name}}
|
||||
without_v=${with_v:1}
|
||||
local_release_id=$without_v
|
||||
echo "::set-output name=release_id::$without_v"
|
||||
echo "::set-output name=open_prs::yes"
|
||||
fi
|
||||
|
||||
# string manipulation to get the branch name. It can't be discovered from the github api
|
||||
# for release triggers so we're stuck with this. If we change our branch naming strategy
|
||||
# we have to update this code. example: 1.0.0 -> 1.0.latest. the sed command takes into
|
||||
# account multiple digits like 1.0.999 -> 1.0.latest
|
||||
|
||||
no_patch=$(sed "s|\(.*\)\..*|\1|" <<< $local_release_id)
|
||||
branch_name="${no_patch}.latest"
|
||||
echo "::set-output name=release_branch::$branch_name"
|
||||
echo "release branch is inferred to be ${branch_name}"
|
||||
|
||||
# attempts to access a previously cached runner
|
||||
- uses: actions/cache@v2
|
||||
id: cache
|
||||
with:
|
||||
path: ${{ env.RUNNER_CACHE_PATH }}
|
||||
key: ${{ steps.variables.outputs.cache_key }}
|
||||
|
||||
- name: Fetch Rust Toolchain
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Add fmt
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: rustup component add rustfmt
|
||||
|
||||
- name: Cargo fmt
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: fmt
|
||||
args: --manifest-path performance/runner/Cargo.toml --all -- --check
|
||||
|
||||
- name: Test
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --manifest-path performance/runner/Cargo.toml
|
||||
|
||||
- name: Build (optimized)
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --release --manifest-path performance/runner/Cargo.toml
|
||||
# the cache action automatically caches this binary at the end of the job
|
||||
|
||||
model:
|
||||
# depends on `latest-runner` as a separate job so that failures in this job do not prevent
|
||||
# a successfully tested and built binary from being cached.
|
||||
needs: [latest-runner]
|
||||
name: Model a release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: '[DEBUG] print variables'
|
||||
run: |
|
||||
echo "all variables defined in latest-runner > Set Variables > outputs"
|
||||
echo "cache_key: ${{ needs.latest-runner.outputs.cache_key }}"
|
||||
echo "release_id: ${{ needs.latest-runner.outputs.release_id }}"
|
||||
echo "open_prs: ${{ needs.latest-runner.outputs.open_prs }}"
|
||||
echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
|
||||
- name: Install dbt
|
||||
run: pip install dbt-postgres==${{ needs.latest-runner.outputs.release_id }}
|
||||
|
||||
- name: Install Hyperfine
|
||||
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
|
||||
|
||||
# explicitly checkout main to get the latest project definitions
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
ref: main
|
||||
|
||||
# this was built in the previous job so it will be there.
|
||||
- name: Fetch Runner
|
||||
uses: actions/cache@v2
|
||||
id: cache
|
||||
with:
|
||||
path: ${{ env.RUNNER_CACHE_PATH }}
|
||||
key: ${{ needs.latest-runner.outputs.cache_key }}
|
||||
|
||||
- name: Move Runner
|
||||
run: mv performance/runner/target/release/runner performance/app
|
||||
|
||||
- name: Change Runner Permissions
|
||||
run: chmod +x ./performance/app
|
||||
|
||||
- name: '[DEBUG] ls baseline directory before run'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
# `${{ github.workspace }}` is used to pass the absolute path
|
||||
# TODO CHANGE NUMBER OF RUNS BEFORE MERGING
|
||||
# TODO this isn't putting the baseline in the right directory. it's putting it one level up.
|
||||
- name: Run Measurement
|
||||
run: mkdir ${{ github.workspace }}/performance/tmp/ && mkdir -p performance/baselines/${{ needs.latest-runner.outputs.release_id }}/ && performance/app model -v ${{ needs.latest-runner.outputs.release_id }} -b ${{ github.workspace }}/performance/baselines/ -p ${{ github.workspace }}/performance/projects/ -t ${{ github.workspace }}/performance/tmp/ -n 2
|
||||
|
||||
- name: '[DEBUG] ls baseline directory after run'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: baseline
|
||||
path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}/
|
||||
|
||||
pr-release-branch:
|
||||
if: ${{ needs.latest-runner.outputs.open_prs == 'yes' && needs.latest-runner.outputs.release_branch != 'main' }}
|
||||
# depends on `model` as a separate job so that the baseline can be committed to more than one branch
|
||||
# i.e. release branch and main
|
||||
needs: [latest-runner, model]
|
||||
name: Open PR for release branch (if specified)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: '[DEBUG] print variables'
|
||||
run: |
|
||||
echo "all variables defined in latest-runner > Set Variables > outputs"
|
||||
echo "cache_key: ${{ needs.latest-runner.outputs.cache_key }}"
|
||||
echo "release_id: ${{ needs.latest-runner.outputs.release_id }}"
|
||||
echo "open_prs: ${{ needs.latest-runner.outputs.open_prs }}"
|
||||
echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
|
||||
|
||||
|
||||
# explicitly checkout the branch specified during dispatch
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
ref: ${{ needs.latest-runner.outputs.release_branch }}
|
||||
|
||||
- name: '[DEBUG] ls baselines before artifact download'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: baseline
|
||||
path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}
|
||||
|
||||
- name: '[DEBUG] ls baselines after artifact download'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
- name: Make Branch
|
||||
uses: EndBug/add-and-commit@v8
|
||||
with:
|
||||
add: performance/baselines
|
||||
author_name: 'Github Build Bot'
|
||||
author_email: 'buildbot@fishtownanalytics.com'
|
||||
message: 'adding performance baseline for ${{ needs.latest-runner.outputs.release_id }}'
|
||||
new_branch: 'performance-bot/release_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
|
||||
push: false
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
|
||||
draft: true
|
||||
base: ${{ needs.latest-runner.outputs.release_branch }}
|
||||
title: 'Adding performance modeling for ${{needs.latest-runner.outputs.release_id}}'
|
||||
branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
|
||||
|
||||
|
||||
pr-main-branch:
|
||||
if: ${{ needs.latest-runner.outputs.open_prs == 'yes' }}
|
||||
# depends on `model` as a separate job so that the baseline can be committed to more than one branch
|
||||
# i.e. release branch and main
|
||||
needs: [latest-runner, model]
|
||||
name: Open PR for main (if specified)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: '[DEBUG] print variables'
|
||||
run: |
|
||||
echo "all variables defined in latest-runner > Set Variables > outputs"
|
||||
echo "cache_key: ${{ needs.latest-runner.outputs.cache_key }}"
|
||||
echo "release_id: ${{ needs.latest-runner.outputs.release_id }}"
|
||||
echo "open_prs: ${{ needs.latest-runner.outputs.open_prs }}"
|
||||
echo "release_branch: ${{ needs.latest-runner.outputs.release_branch }}"
|
||||
|
||||
# explicitly checkout main
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
ref: main
|
||||
|
||||
- name: '[DEBUG] ls baselines before artifact download'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: baseline
|
||||
path: performance/baselines/${{ needs.latest-runner.outputs.release_id }}
|
||||
|
||||
- name: '[DEBUG] ls baselines after artifact download'
|
||||
run: ls -R performance/baselines/
|
||||
|
||||
- name: Make Branch
|
||||
uses: EndBug/add-and-commit@v8
|
||||
with:
|
||||
add: performance/baselines
|
||||
author_name: 'Github Build Bot'
|
||||
author_email: 'buildbot@fishtownanalytics.com'
|
||||
message: 'adding performance baseline for ${{ needs.latest-runner.outputs.release_id }}'
|
||||
new_branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
|
||||
push: false
|
||||
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
|
||||
draft: true
|
||||
base: main
|
||||
title: 'Adding performance modeling for ${{needs.latest-runner.outputs.release_id}}'
|
||||
branch: 'performance-bot/main_${{needs.latest-runner.outputs.release_id}}_${{GITHUB.RUN_ID}}'
|
||||
138
.github/workflows/sample_performance.yml
vendored
Normal file
138
.github/workflows/sample_performance.yml
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
# **what?**
|
||||
# This workflow samples performance characteristics of your commit and compares them to
|
||||
# the most recent release. If they are significanly off from the previously recorded
|
||||
# distribution it will trigger a failure. Do not rerun these failures to get them to pass.
|
||||
# There is more information in the performance readme about how to handle failures.
|
||||
#
|
||||
# **why?**
|
||||
# This will help us potentially catch new performance regressions in development before
|
||||
# releasing a new version.
|
||||
#
|
||||
# **whent?**
|
||||
# This runs on every commit in PRs.
|
||||
#
|
||||
|
||||
|
||||
name: Performance Regression Tests
|
||||
# Schedule triggers
|
||||
on:
|
||||
# sampling is fast enough to run on every commit in PRs
|
||||
pull_request:
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
RUNNER_CACHE_PATH: performance/runner/target/release/runner
|
||||
|
||||
jobs:
|
||||
latest-runner:
|
||||
name: Build or Fetch Performance Runner
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RUSTFLAGS: "-D warnings"
|
||||
steps:
|
||||
|
||||
# specifically checksout main so that when we're sampling on commits to
|
||||
# patch releases, we're using the latest runner code not whatever is in the working branch.
|
||||
#
|
||||
# the with clause should be commented out if you're working on the runner
|
||||
# and want to see output from your code. it's pulling from main, not what you're
|
||||
# working on.
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
# TODO uncomment when done developing this.
|
||||
# with:
|
||||
# ref: main
|
||||
|
||||
# create a cache key that will be used in the next job. without this the
|
||||
# next job would have to checkout from main an hash the files itself.
|
||||
- name: Create Cache Key
|
||||
id: cacheKey
|
||||
run: echo "::set-output name=key::${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}"
|
||||
working-directory: ${{ env.RUNNER_CACHE_PATH }}
|
||||
|
||||
# attempts to access a previously cached runner
|
||||
#
|
||||
# unless you're developing the runner, it should be in the cache.
|
||||
- uses: actions/cache@v2
|
||||
id: cache
|
||||
with:
|
||||
path: ${{ env.RUNNER_CACHE_PATH }}
|
||||
key: ${{ steps.cacheKey.outputs.key }}
|
||||
|
||||
- name: Fetch Rust Toolchain
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Add fmt
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
run: rustup component add rustfmt
|
||||
|
||||
- name: Cargo fmt
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: fmt
|
||||
args: --manifest-path performance/runner/Cargo.toml --all -- --check
|
||||
|
||||
- name: Test
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --manifest-path performance/runner/Cargo.toml
|
||||
|
||||
- name: Build (optimized)
|
||||
if: steps.cache.outputs.cache-hit != 'true'
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --release --manifest-path performance/runner/Cargo.toml
|
||||
# the cache action automatically caches this binary at the end of the job
|
||||
|
||||
sample:
|
||||
# depends on `latest-runner` as a separate job so that failures in this job do not prevent
|
||||
# a successfully tested and built binary from being cached. Also so we can checkout dbt
|
||||
# from THIS branch not main like we want for the runner.
|
||||
needs: [latest-runner]
|
||||
name: Compare Performance Samples
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
# checkout this branch not main
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
|
||||
- name: Install dbt
|
||||
run: pip install -r dev-requirements.txt -r editable-requirements.txt
|
||||
|
||||
- name: Install Hyperfine
|
||||
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
|
||||
|
||||
# this was built in the previous job so it will be there.
|
||||
- name: Fetch Runner
|
||||
uses: actions/cache@v2
|
||||
id: cache
|
||||
with:
|
||||
path: ${{ env.RUNNER_CACHE_PATH }}
|
||||
key: ${{ steps.cacheKey.outputs.key }}
|
||||
|
||||
- name: Move Runner
|
||||
run: mv performance/runner/target/release/runner ./performance/app
|
||||
|
||||
- name: Change Runner Permissions
|
||||
run: chmod +x ./performance/app
|
||||
|
||||
# `${{ github.workspace }}` is used to pass the absolute path
|
||||
- name: Run Measurement
|
||||
run: mkdir tmp && ./app sample -b ${{ github.workspace }}/performance/baselines -p ${{ github.workspace }}/performance/projects -o ${{ github.workspace }}/tmp
|
||||
working-directory: ${{ github.workspace }}/performance/
|
||||
40
performance/baselines/1.0.3/parse___2000_models.json
Normal file
40
performance/baselines/1.0.3/parse___2000_models.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"version": "1.0.3",
|
||||
"metric": {
|
||||
"name": "parse",
|
||||
"project_name": "01_2000_simple_models"
|
||||
},
|
||||
"ts": "2022-03-04T00:02:52.657727515Z",
|
||||
"measurement": {
|
||||
"command": "dbt parse --no-version-check --profiles-dir ../../project_config/",
|
||||
"mean": 41.224566760615,
|
||||
"stddev": 0.252468634424254,
|
||||
"median": 41.182836243915,
|
||||
"user": 40.70073678499999,
|
||||
"system": 0.61185062,
|
||||
"min": 40.89372129691501,
|
||||
"max": 41.68176405591501,
|
||||
"times": [
|
||||
41.397582801915,
|
||||
41.618822256915,
|
||||
41.374914350915,
|
||||
41.68176405591501,
|
||||
41.255119986915,
|
||||
41.528348636915,
|
||||
41.238762892915,
|
||||
40.950121934915,
|
||||
41.388716648915,
|
||||
41.62938069991501,
|
||||
41.139914502915,
|
||||
41.114225200915,
|
||||
41.045012222915,
|
||||
41.01039839391501,
|
||||
40.915296414915,
|
||||
41.006528646915,
|
||||
40.89372129691501,
|
||||
40.951454721915,
|
||||
41.125491559915,
|
||||
41.225757984915
|
||||
]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user