mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-19 20:01:29 +00:00
Compare commits
14 Commits
enable-pos
...
paw/perf-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce827c658a | ||
|
|
70afe559c6 | ||
|
|
4b2a84872b | ||
|
|
aafb017d5f | ||
|
|
98746ead28 | ||
|
|
03aab3ab98 | ||
|
|
c8b911b400 | ||
|
|
72696ab176 | ||
|
|
b661ad586c | ||
|
|
8bbbf6588b | ||
|
|
b7943f3372 | ||
|
|
52456b2ff7 | ||
|
|
85f9fd7251 | ||
|
|
99ef0dd79c |
6
.changes/unreleased/Under the Hood-20231122-124612.yaml
Normal file
6
.changes/unreleased/Under the Hood-20231122-124612.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Under the Hood
|
||||
body: Added a GitHub action for checking performance characteristics of dbt.
|
||||
time: 2023-11-22T12:46:12.16794-05:00
|
||||
custom:
|
||||
Author: peterallenwebb
|
||||
Issue: "8323"
|
||||
59
.github/workflows/perf-check.yml
vendored
Normal file
59
.github/workflows/perf-check.yml
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# **what?**
|
||||
# This workflow uses a python script to check the performance of dbt against
|
||||
# baselines on a set of benchmark projects.
|
||||
|
||||
name: Performance Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-and-perf-check:
|
||||
name: Build dbt and check performance
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Check out the repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.8'
|
||||
|
||||
- name: Install Python dependencies and install dbt
|
||||
run: |
|
||||
python -m pip install --user --upgrade pip
|
||||
python -m pip --version
|
||||
make dev
|
||||
mypy --version
|
||||
dbt --version
|
||||
|
||||
- name: Create profile
|
||||
run: |
|
||||
mkdir $HOME/.dbt
|
||||
echo "default:" >> $HOME/.dbt/profiles.yml
|
||||
echo " outputs:" >> $HOME/.dbt/profiles.yml
|
||||
echo " dev:" >> $HOME/.dbt/profiles.yml
|
||||
echo " dbname: dummy" >> $HOME/.dbt/profiles.yml
|
||||
echo " host: localhost" >> $HOME/.dbt/profiles.yml
|
||||
echo " password: paswd" >> $HOME/.dbt/profiles.yml
|
||||
echo " port: 5432" >> $HOME/.dbt/profiles.yml
|
||||
echo " schema: dummy" >> $HOME/.dbt/profiles.yml
|
||||
echo " threads: 4" >> $HOME/.dbt/profiles.yml
|
||||
echo " type: postgres" >> $HOME/.dbt/profiles.yml
|
||||
echo " user: dummy" >> $HOME/.dbt/profiles.yml
|
||||
echo " target: dev" >> $HOME/.dbt/profiles.yml
|
||||
cat $HOME/.dbt/profiles.yml
|
||||
|
||||
- name: Run performance script
|
||||
run: |
|
||||
python ./scripts/perf-check.py baseline
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: perf-check-result
|
||||
path: ./perf_check.json
|
||||
129
scripts/perf-check.py
Normal file
129
scripts/perf-check.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
projects = {
|
||||
# example of how to use project in separate public repo, not used yet.
|
||||
# "jaffle_shop": {
|
||||
# "name": "jaffle_shop",
|
||||
# "git_url": "https://github.com/dbt-labs/jaffle_shop.git",
|
||||
# "jobs": {
|
||||
# "jaffle_shop__parse_no_partial": {
|
||||
# "command": ["dbt", "parse", "--no-partial-parse"],
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
"simple_models": {
|
||||
"name": "simple_models",
|
||||
"path": "./performance/projects/01_2000_simple_models",
|
||||
"jobs": {
|
||||
"simple_models__parse_no_partial": {
|
||||
"command": ["dbt", "parse", "--no-partial-parse"],
|
||||
},
|
||||
"simple_models__second_parse": {
|
||||
"command": ["dbt", "parse"],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def print_usage() -> None:
|
||||
print("invalid usage")
|
||||
|
||||
|
||||
def git_checkout(repo: str, path: pathlib.Path, commit: str = None) -> None:
|
||||
if not os.path.exists(path):
|
||||
print(f"Didn't find path {path}. Cloing {repo} into {path}.")
|
||||
res = subprocess.run(["git", "clone", repo, path], capture_output=True)
|
||||
res.check_returncode()
|
||||
else:
|
||||
print(f"Found path {path}. Skipping clone of {repo}.")
|
||||
|
||||
if commit:
|
||||
print(f"Checking out commit {commit} for repo {repo}")
|
||||
res = subprocess.run(["git", "checkout", commit], cwd=path, capture_output=True)
|
||||
res.check_returncode()
|
||||
|
||||
|
||||
def prepare_projects(projects) -> None:
|
||||
for project_name, project in projects.items():
|
||||
if "git_url" in project:
|
||||
git_checkout(project["git_url"], project_name)
|
||||
|
||||
|
||||
def run_jobs(projects):
|
||||
results = {}
|
||||
|
||||
for project_name, project in projects.items():
|
||||
for job_name, job in project["jobs"].items():
|
||||
print(f"running job {job_name}")
|
||||
cwd = project["path"] if "path" in project else project_name
|
||||
start = time.perf_counter()
|
||||
res = subprocess.run(job["command"], cwd=cwd)
|
||||
end = time.perf_counter()
|
||||
if res.returncode != 0:
|
||||
results[job_name] = {"succeeded": False}
|
||||
else:
|
||||
results[job_name] = {"succeeded": True, "time": end - start}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def compare(baseline_file: str, result_file: str) -> None:
|
||||
with open(baseline_file, "r") as b:
|
||||
baseline = json.load(b)
|
||||
|
||||
with open(result_file, "r") as r:
|
||||
result = json.load(r)
|
||||
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
table = Table(title="Performance Comparison")
|
||||
table.add_column("Job Name")
|
||||
table.add_column("Baseline")
|
||||
table.add_column("Result")
|
||||
table.add_column("Change")
|
||||
|
||||
for job_name, baseline_record in baseline.items():
|
||||
baseline_time = baseline_record.get("time")
|
||||
baseline_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
|
||||
|
||||
result_record = result[job_name]
|
||||
result_time = result_record.get("time")
|
||||
result_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
|
||||
|
||||
time_change_str = "-"
|
||||
if result_time and baseline_time:
|
||||
time_change_pct = 100.0 * (result_time - baseline_time) / baseline_time
|
||||
time_change_pfx = "[green]" if time_change_pct >= 0.0 else "[red]"
|
||||
time_change_str = time_change_pfx + "{:.1f}%".format(time_change_pct)
|
||||
|
||||
table.add_row(job_name, baseline_time_str, result_time_str, time_change_str)
|
||||
|
||||
print()
|
||||
Console().print(table)
|
||||
|
||||
|
||||
def baseline(projects) -> None:
|
||||
prepare_projects(projects)
|
||||
results = run_jobs(projects)
|
||||
|
||||
print("Writing results to 'perf_check.json'.")
|
||||
with open("perf_check.json", "w") as w:
|
||||
json.dump(results, w, indent=4)
|
||||
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print_usage()
|
||||
elif sys.argv[1] == "baseline":
|
||||
baseline(projects)
|
||||
elif sys.argv[1] == "compare":
|
||||
compare(sys.argv[2], sys.argv[3])
|
||||
else:
|
||||
print_usage()
|
||||
Reference in New Issue
Block a user