mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-22 05:01:28 +00:00
Compare commits
14 Commits
postgres-s
...
paw/perf-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce827c658a | ||
|
|
70afe559c6 | ||
|
|
4b2a84872b | ||
|
|
aafb017d5f | ||
|
|
98746ead28 | ||
|
|
03aab3ab98 | ||
|
|
c8b911b400 | ||
|
|
72696ab176 | ||
|
|
b661ad586c | ||
|
|
8bbbf6588b | ||
|
|
b7943f3372 | ||
|
|
52456b2ff7 | ||
|
|
85f9fd7251 | ||
|
|
99ef0dd79c |
6
.changes/unreleased/Under the Hood-20231122-124612.yaml
Normal file
6
.changes/unreleased/Under the Hood-20231122-124612.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
kind: Under the Hood
|
||||||
|
body: Added a GitHub action for checking performance characteristics of dbt.
|
||||||
|
time: 2023-11-22T12:46:12.16794-05:00
|
||||||
|
custom:
|
||||||
|
Author: peterallenwebb
|
||||||
|
Issue: "8323"
|
||||||
59
.github/workflows/perf-check.yml
vendored
Normal file
59
.github/workflows/perf-check.yml
vendored
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# **what?**
|
||||||
|
# This workflow uses a python script to check the performance of dbt against
|
||||||
|
# baselines on a set of benchmark projects.
|
||||||
|
|
||||||
|
name: Performance Check
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-perf-check:
|
||||||
|
name: Build dbt and check performance
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 10
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check out the repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.8'
|
||||||
|
|
||||||
|
- name: Install Python dependencies and install dbt
|
||||||
|
run: |
|
||||||
|
python -m pip install --user --upgrade pip
|
||||||
|
python -m pip --version
|
||||||
|
make dev
|
||||||
|
mypy --version
|
||||||
|
dbt --version
|
||||||
|
|
||||||
|
- name: Create profile
|
||||||
|
run: |
|
||||||
|
mkdir $HOME/.dbt
|
||||||
|
echo "default:" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " outputs:" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " dev:" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " dbname: dummy" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " host: localhost" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " password: paswd" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " port: 5432" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " schema: dummy" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " threads: 4" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " type: postgres" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " user: dummy" >> $HOME/.dbt/profiles.yml
|
||||||
|
echo " target: dev" >> $HOME/.dbt/profiles.yml
|
||||||
|
cat $HOME/.dbt/profiles.yml
|
||||||
|
|
||||||
|
- name: Run performance script
|
||||||
|
run: |
|
||||||
|
python ./scripts/perf-check.py baseline
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: perf-check-result
|
||||||
|
path: ./perf_check.json
|
||||||
129
scripts/perf-check.py
Normal file
129
scripts/perf-check.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
projects = {
|
||||||
|
# example of how to use project in separate public repo, not used yet.
|
||||||
|
# "jaffle_shop": {
|
||||||
|
# "name": "jaffle_shop",
|
||||||
|
# "git_url": "https://github.com/dbt-labs/jaffle_shop.git",
|
||||||
|
# "jobs": {
|
||||||
|
# "jaffle_shop__parse_no_partial": {
|
||||||
|
# "command": ["dbt", "parse", "--no-partial-parse"],
|
||||||
|
# },
|
||||||
|
# }
|
||||||
|
# },
|
||||||
|
"simple_models": {
|
||||||
|
"name": "simple_models",
|
||||||
|
"path": "./performance/projects/01_2000_simple_models",
|
||||||
|
"jobs": {
|
||||||
|
"simple_models__parse_no_partial": {
|
||||||
|
"command": ["dbt", "parse", "--no-partial-parse"],
|
||||||
|
},
|
||||||
|
"simple_models__second_parse": {
|
||||||
|
"command": ["dbt", "parse"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def print_usage() -> None:
|
||||||
|
print("invalid usage")
|
||||||
|
|
||||||
|
|
||||||
|
def git_checkout(repo: str, path: pathlib.Path, commit: str = None) -> None:
|
||||||
|
if not os.path.exists(path):
|
||||||
|
print(f"Didn't find path {path}. Cloing {repo} into {path}.")
|
||||||
|
res = subprocess.run(["git", "clone", repo, path], capture_output=True)
|
||||||
|
res.check_returncode()
|
||||||
|
else:
|
||||||
|
print(f"Found path {path}. Skipping clone of {repo}.")
|
||||||
|
|
||||||
|
if commit:
|
||||||
|
print(f"Checking out commit {commit} for repo {repo}")
|
||||||
|
res = subprocess.run(["git", "checkout", commit], cwd=path, capture_output=True)
|
||||||
|
res.check_returncode()
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_projects(projects) -> None:
|
||||||
|
for project_name, project in projects.items():
|
||||||
|
if "git_url" in project:
|
||||||
|
git_checkout(project["git_url"], project_name)
|
||||||
|
|
||||||
|
|
||||||
|
def run_jobs(projects):
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for project_name, project in projects.items():
|
||||||
|
for job_name, job in project["jobs"].items():
|
||||||
|
print(f"running job {job_name}")
|
||||||
|
cwd = project["path"] if "path" in project else project_name
|
||||||
|
start = time.perf_counter()
|
||||||
|
res = subprocess.run(job["command"], cwd=cwd)
|
||||||
|
end = time.perf_counter()
|
||||||
|
if res.returncode != 0:
|
||||||
|
results[job_name] = {"succeeded": False}
|
||||||
|
else:
|
||||||
|
results[job_name] = {"succeeded": True, "time": end - start}
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def compare(baseline_file: str, result_file: str) -> None:
|
||||||
|
with open(baseline_file, "r") as b:
|
||||||
|
baseline = json.load(b)
|
||||||
|
|
||||||
|
with open(result_file, "r") as r:
|
||||||
|
result = json.load(r)
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
table = Table(title="Performance Comparison")
|
||||||
|
table.add_column("Job Name")
|
||||||
|
table.add_column("Baseline")
|
||||||
|
table.add_column("Result")
|
||||||
|
table.add_column("Change")
|
||||||
|
|
||||||
|
for job_name, baseline_record in baseline.items():
|
||||||
|
baseline_time = baseline_record.get("time")
|
||||||
|
baseline_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
|
||||||
|
|
||||||
|
result_record = result[job_name]
|
||||||
|
result_time = result_record.get("time")
|
||||||
|
result_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
|
||||||
|
|
||||||
|
time_change_str = "-"
|
||||||
|
if result_time and baseline_time:
|
||||||
|
time_change_pct = 100.0 * (result_time - baseline_time) / baseline_time
|
||||||
|
time_change_pfx = "[green]" if time_change_pct >= 0.0 else "[red]"
|
||||||
|
time_change_str = time_change_pfx + "{:.1f}%".format(time_change_pct)
|
||||||
|
|
||||||
|
table.add_row(job_name, baseline_time_str, result_time_str, time_change_str)
|
||||||
|
|
||||||
|
print()
|
||||||
|
Console().print(table)
|
||||||
|
|
||||||
|
|
||||||
|
def baseline(projects) -> None:
|
||||||
|
prepare_projects(projects)
|
||||||
|
results = run_jobs(projects)
|
||||||
|
|
||||||
|
print("Writing results to 'perf_check.json'.")
|
||||||
|
with open("perf_check.json", "w") as w:
|
||||||
|
json.dump(results, w, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print_usage()
|
||||||
|
elif sys.argv[1] == "baseline":
|
||||||
|
baseline(projects)
|
||||||
|
elif sys.argv[1] == "compare":
|
||||||
|
compare(sys.argv[2], sys.argv[3])
|
||||||
|
else:
|
||||||
|
print_usage()
|
||||||
Reference in New Issue
Block a user