Clean up dummy credentials and make types in script semi-respectable

Fix artifact path
Actually use the append operator as intended
2025-12-19 20:01:29 +00:00 · 2023-11-27 11:48:58 -05:00 · 2023-11-22 16:34:03 -05:00 · 2023-11-22 16:30:14 -05:00 · 2023-11-22 16:25:14 -05:00 · 2023-11-22 15:21:50 -05:00
3 changed files with 194 additions and 0 deletions
--- a/Hood-20231122-124612.yaml
+++ b/Hood-20231122-124612.yaml
@@ -0,0 +1,6 @@
+kind: Under the Hood
+body: Added a GitHub action for checking performance characteristics of dbt.
+time: 2023-11-22T12:46:12.16794-05:00
+custom:
+  Author: peterallenwebb
+  Issue: "8323"
--- a/.github/workflows/perf-check.yml
+++ b/.github/workflows/perf-check.yml
@@ -0,0 +1,59 @@
+# **what?**
+# This workflow uses a python script to check the performance of dbt against
+# baselines on a set of benchmark projects.
+
+name: Performance Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  build-and-perf-check:
+    name: Build dbt and check performance
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8'
+
+      - name: Install Python dependencies and install dbt
+        run: |
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          make dev
+          mypy --version
+          dbt --version
+
+      - name: Create profile
+        run: |
+          mkdir $HOME/.dbt
+          echo "default:"               >> $HOME/.dbt/profiles.yml
+          echo "  outputs:"             >> $HOME/.dbt/profiles.yml
+          echo "    dev:"               >> $HOME/.dbt/profiles.yml
+          echo "      dbname: dummy"    >> $HOME/.dbt/profiles.yml
+          echo "      host: localhost"  >> $HOME/.dbt/profiles.yml
+          echo "      password: paswd"  >> $HOME/.dbt/profiles.yml
+          echo "      port: 5432"       >> $HOME/.dbt/profiles.yml
+          echo "      schema: dummy"    >> $HOME/.dbt/profiles.yml
+          echo "      threads: 4"       >> $HOME/.dbt/profiles.yml
+          echo "      type: postgres"   >> $HOME/.dbt/profiles.yml
+          echo "      user: dummy"      >> $HOME/.dbt/profiles.yml
+          echo "  target: dev"          >> $HOME/.dbt/profiles.yml
+          cat $HOME/.dbt/profiles.yml
+
+      - name: Run performance script
+        run: |
+          python ./scripts/perf-check.py baseline
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: perf-check-result
+          path: ./perf_check.json
--- a/scripts/perf-check.py
+++ b/scripts/perf-check.py
@@ -0,0 +1,129 @@
+import json
+import os
+import pathlib
+import subprocess
+import sys
+import time
+
+
+projects = {
+    # example of how to use project in separate public repo, not used yet.
+    # "jaffle_shop": {
+    # 	"name": "jaffle_shop",
+    # 	"git_url": "https://github.com/dbt-labs/jaffle_shop.git",
+    # 	"jobs": {
+    # 		"jaffle_shop__parse_no_partial": {
+    # 			"command": ["dbt", "parse", "--no-partial-parse"],
+    # 		},
+    # 	}
+    # },
+    "simple_models": {
+        "name": "simple_models",
+        "path": "./performance/projects/01_2000_simple_models",
+        "jobs": {
+            "simple_models__parse_no_partial": {
+                "command": ["dbt", "parse", "--no-partial-parse"],
+            },
+            "simple_models__second_parse": {
+                "command": ["dbt", "parse"],
+            },
+        },
+    },
+}
+
+
+def print_usage() -> None:
+    print("invalid usage")
+
+
+def git_checkout(repo: str, path: pathlib.Path, commit: str = None) -> None:
+    if not os.path.exists(path):
+        print(f"Didn't find path {path}. Cloing {repo} into {path}.")
+        res = subprocess.run(["git", "clone", repo, path], capture_output=True)
+        res.check_returncode()
+    else:
+        print(f"Found path {path}. Skipping clone of {repo}.")
+
+    if commit:
+        print(f"Checking out commit {commit} for repo {repo}")
+        res = subprocess.run(["git", "checkout", commit], cwd=path, capture_output=True)
+        res.check_returncode()
+
+
+def prepare_projects(projects) -> None:
+    for project_name, project in projects.items():
+        if "git_url" in project:
+            git_checkout(project["git_url"], project_name)
+
+
+def run_jobs(projects):
+    results = {}
+
+    for project_name, project in projects.items():
+        for job_name, job in project["jobs"].items():
+            print(f"running job {job_name}")
+            cwd = project["path"] if "path" in project else project_name
+            start = time.perf_counter()
+            res = subprocess.run(job["command"], cwd=cwd)
+            end = time.perf_counter()
+            if res.returncode != 0:
+                results[job_name] = {"succeeded": False}
+            else:
+                results[job_name] = {"succeeded": True, "time": end - start}
+
+    return results
+
+
+def compare(baseline_file: str, result_file: str) -> None:
+    with open(baseline_file, "r") as b:
+        baseline = json.load(b)
+
+    with open(result_file, "r") as r:
+        result = json.load(r)
+
+    from rich.console import Console
+    from rich.table import Table
+
+    table = Table(title="Performance Comparison")
+    table.add_column("Job Name")
+    table.add_column("Baseline")
+    table.add_column("Result")
+    table.add_column("Change")
+
+    for job_name, baseline_record in baseline.items():
+        baseline_time = baseline_record.get("time")
+        baseline_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
+
+        result_record = result[job_name]
+        result_time = result_record.get("time")
+        result_time_str = "{:.1f}s".format(baseline_time) if time is not None else "?"
+
+        time_change_str = "-"
+        if result_time and baseline_time:
+            time_change_pct = 100.0 * (result_time - baseline_time) / baseline_time
+            time_change_pfx = "[green]" if time_change_pct >= 0.0 else "[red]"
+            time_change_str = time_change_pfx + "{:.1f}%".format(time_change_pct)
+
+        table.add_row(job_name, baseline_time_str, result_time_str, time_change_str)
+
+    print()
+    Console().print(table)
+
+
+def baseline(projects) -> None:
+    prepare_projects(projects)
+    results = run_jobs(projects)
+
+    print("Writing results to 'perf_check.json'.")
+    with open("perf_check.json", "w") as w:
+        json.dump(results, w, indent=4)
+
+
+if len(sys.argv) < 2:
+    print_usage()
+elif sys.argv[1] == "baseline":
+    baseline(projects)
+elif sys.argv[1] == "compare":
+    compare(sys.argv[2], sys.argv[3])
+else:
+    print_usage()
Author	SHA1	Message	Date
Peter Allen Webb	ce827c658a	Clean up dummy credentials and make types in script semi-respectable	2023-11-27 11:48:58 -05:00
Peter Allen Webb	70afe559c6	Fix artifact path	2023-11-22 16:34:03 -05:00
Peter Allen Webb	4b2a84872b	Actually use the append operator as intended	2023-11-22 16:30:14 -05:00
Peter Allen Webb	aafb017d5f	Show profile contents	2023-11-22 16:25:14 -05:00
Peter Allen Webb	98746ead28	Script bug fixes	2023-11-22 15:21:50 -05:00
Peter Allen Webb	03aab3ab98	Fix script parameter	2023-11-22 15:16:26 -05:00
Peter Allen Webb	c8b911b400	Add parameter to script	2023-11-22 15:08:33 -05:00
Peter Allen Webb	72696ab176	Add mkdir so dir exists	2023-11-22 14:08:49 -05:00
Peter Allen Webb	b661ad586c	Fix indentation	2023-11-22 12:50:00 -05:00
Peter Allen Webb	8bbbf6588b	Modify on clause	2023-11-22 12:47:51 -05:00
Peter Allen Webb	b7943f3372	Add changelog entry	2023-11-22 12:47:07 -05:00
Peter Allen Webb	52456b2ff7	Add more options to trigger workflow	2023-11-22 12:32:36 -05:00
Peter Allen Webb	85f9fd7251	Add on clause.	2023-11-22 12:24:57 -05:00
Peter Allen Webb	99ef0dd79c	Add a GitHub action for performance checks.	2023-11-22 12:11:40 -05:00