Compare commits

...

29 Commits

Author SHA1 Message Date
MichelleArk
cf4384da38 uncomment dbt-postgres-testing 2025-12-12 17:03:06 -05:00
Michelle Ark
71a6e53102 delete deprecated create_adapter_plugins script (#12280) 2025-12-12 17:01:18 -05:00
Michelle Ark
c4dc80dcd2 delete duplicate adapters tests (#12275) 2025-12-12 16:53:04 -05:00
Michelle Ark
8097a34726 remove snowplow telemetry schemas for < 0.11.0 versions (#12279) 2025-12-12 14:54:40 -05:00
MichelleArk
b66dff7278 move custom-hooks to scripts 2025-12-12 14:24:07 -05:00
Michelle Ark
22d21edb4b Reorganize docs/arch (#12270) 2025-12-12 10:26:23 -05:00
Michelle Ark
bef7928e22 remove unused performance CI framework (#12278) 2025-12-12 10:26:06 -05:00
Michelle Ark
c573131d91 cleanup stale test migration docs (#12274) 2025-12-11 12:19:35 -05:00
Michelle Ark
f10d84d05e move setup_db.sh to scripts, remove test dir (#12273) 2025-12-11 12:19:22 -05:00
Michelle Ark
79a4c8969e improve error message clarity when detecting nodes with spaces in name (#12272) 2025-12-11 12:15:06 -05:00
Gerda Shank
9a80308fcf Implementation of meta_get and meta_require (#12267) 2025-12-10 22:57:28 -05:00
Quigley Malcolm
7a13d08376 Ensure all recent deprecation warnings include the name in the message (#12265)
* Add event name to `message` of recently added deprecations

* Make it harder to not supply the event name to deprecation messages

* Add changie doc

* Fixup import naming
2025-12-10 13:03:24 -06:00
Colin Rogers
9e9f5b8e57 add add_catalog_integration call even if we have a pre-existing manifest (#12262)
* add add_catalog_integration call even if we have a pre-existing manifest

* add changelog
2025-12-10 09:35:39 -08:00
Michelle Ark
9cd6a23eba add compile test for batch context vars (#12261) 2025-12-09 12:08:43 -08:00
Emily Rockman
e46c37cf07 fix target file for dbt-common CI (#12258) 2025-12-08 17:15:24 -05:00
Michelle Ark
df23f398a6 set unit test config.enabled to False if it is testing a disabled model (#12251) 2025-12-08 13:27:41 -08:00
Emily Rockman
97df9278c0 Move to hatch for build tooling (#12192)
* initial hatch implmentation

* cleanup docs

* replacing makefile

* cleanup hatch commands to match adapters

reorganize more to match adapters setup

script comment

dont pip install

fix test commands

* changelog

improve changelog

* CI fix

* fix for env

* use a standard version file

* remove odd license logic

* fix bumpversion

* remove sha input

* more cleanup

* fix legacy build path

* define version for pyproject.toml

* use hatch hook for license

* remove tox

* ensure tests are split

* remove temp file for testing

* explicitly match old verion in pyproject.toml

* fix up testing

* get rid of bumpversion

* put dev_dependencies.txtin hatch

* setup.py is now dead

* set python version for local dev

* local dev fixes

* temp script to compare wheels

* parity with existing wheel builds

* Revert "temp script to compare wheels"

This reverts commit c31417a092.

* fix docker test file
2025-12-05 21:59:44 -05:00
Edgar Ramírez Mondragón
748d352b6b Address Click 8.2+ deprecation warning by using type-checking imports (#12039) 2025-12-05 13:13:25 -08:00
Michelle Ark
bbd8fa02f1 fix flaky invocation context + warn error settings in parser unit tests (#12256) 2025-12-05 10:19:08 -05:00
Emily Rockman
61009f6ba7 Tweak release for unused fields (#12209)
* point to branch:

* remove unused code paths

* make release backwards compatible

hardcode

* use correct types

* put main back
2025-12-04 09:11:59 -05:00
Emily Rockman
ee7ecdc29f Improve --add-package duplicate detection (#12239)
* optimize name matches

* changelog

* Apply suggestion from @emmyoop
2025-12-03 12:49:57 -05:00
Matt Burke
d74b58a137 Fix partial parsing bug with singular tests (#12224) 2025-12-02 14:30:47 -05:00
Michelle Ark
12b04e7d2f avoid raising custom-key-in-config-deprecation for pre/post-hook model SQL config validation (#12244) 2025-12-02 14:22:02 -05:00
Michelle Ark
5d56a052a7 Turn on jsonschema-based deprecations by default, based on adapter support (#12240) 2025-12-02 12:37:37 -05:00
Emily Rockman
62a8ea05a6 stop excluding the core team from changelogs (#12241) 2025-12-02 09:38:24 -05:00
Emily Rockman
1219bd49aa Merge pull request #12238 from dbt-labs/revert-merge
Revert merge on main
2025-12-01 14:05:11 -05:00
Emily Rockman
791d1ebdcd Revert "changelog"
This reverts commit 8ff86d35ea.
2025-12-01 13:27:03 -05:00
Emily Rockman
148b9b41a5 Revert "optimize name matches"
This reverts commit 087f8167ec.
2025-12-01 13:27:02 -05:00
Emily Rockman
d096a6776e Revert "deal with bool"
This reverts commit bcb07ceb7b.
2025-12-01 13:26:58 -05:00
4280 changed files with 1121 additions and 71425 deletions

View File

@@ -1,37 +0,0 @@
[bumpversion]
current_version = 1.12.0a1
parse = (?P<major>[\d]+) # major version number
\.(?P<minor>[\d]+) # minor version number
\.(?P<patch>[\d]+) # patch version number
(?P<prerelease> # optional pre-release - ex: a1, b2, rc25
(?P<prekind>a|b|rc) # pre-release type
(?P<num>[\d]+) # pre-release version number
)?
( # optional nightly release indicator
\.(?P<nightly>dev[0-9]+) # ex: .dev02142023
)? # expected matches: `1.15.0`, `1.5.0a11`, `1.5.0a1.dev123`, `1.5.0.dev123457`, expected failures: `1`, `1.5`, `1.5.2-a1`, `text1.5.0`
serialize =
{major}.{minor}.{patch}{prekind}{num}.{nightly}
{major}.{minor}.{patch}.{nightly}
{major}.{minor}.{patch}{prekind}{num}
{major}.{minor}.{patch}
commit = False
tag = False
[bumpversion:part:prekind]
first_value = a
optional_value = final
values =
a
b
rc
final
[bumpversion:part:num]
first_value = 1
[bumpversion:part:nightly]
[bumpversion:file:core/pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

View File

@@ -0,0 +1,6 @@
kind: Features
body: Raise jsonschema-based deprecation warnings by default
time: 2025-12-01T16:52:09.354436-05:00
custom:
Author: michelleark
Issue: 12240

View File

@@ -0,0 +1,6 @@
kind: Features
body: ':bug: :snowman: Disable unit tests whose model is disabled'
time: 2025-12-03T12:29:26.209248-05:00
custom:
Author: michelleark
Issue: "10540"

View File

@@ -0,0 +1,6 @@
kind: Features
body: Implement config.meta_get and config.meta_require
time: 2025-12-10T20:20:01.354288-05:00
custom:
Author: gshank
Issue: "12012"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Address Click 8.2+ deprecation warning
time: 2025-09-22T15:17:26.983151-06:00
custom:
Author: edgarrmondragon
Issue: "12038"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Fix bug in partial parsing when updating a model with a schema file that is referenced by a singular test
time: 2025-11-28T10:21:29.911147Z
custom:
Author: mattogburke
Issue: "12223"

View File

@@ -4,4 +4,4 @@ body: ':bug: :snowman: Improve `dbt deps --add-package` duplicate detection with
time: 2025-11-28T16:31:44.344099-05:00
custom:
Author: emmyoop
Issue: "12234"
Issue: "12239"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: ':bug: :snowman: Fix false positive deprecation warning of pre/post-hook SQL configs'
time: 2025-12-02T13:37:05.012112-05:00
custom:
Author: michelleark
Issue: "12244"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Ensure recent deprecation warnings include event name in message
time: 2025-12-09T17:50:31.334618-06:00
custom:
Author: QMalcolm
Issue: "12264"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Improve error message clarity when detecting nodes with space in name
time: 2025-12-10T14:39:35.107841-08:00
custom:
Author: michelleark
Issue: "11835"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Replace setuptools and tox with hatch for build, test, and environment management.
time: 2025-11-21T14:05:15.838252-05:00
custom:
Author: emmyoop
Issue: "12151"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Add add_catalog_integration call even if we have a pre-existing manifest
time: 2025-12-09T13:18:57.043254-08:00
custom:
Author: colin-rogers-dbt
Issue: "12262"

View File

@@ -41,32 +41,26 @@ newlines:
endOfVersion: 1
custom:
- key: Author
label: GitHub Username(s) (separated by a single space if multiple)
type: string
minLength: 3
- key: Issue
label: GitHub Issue Number (separated by a single space if multiple)
type: string
minLength: 1
- key: Author
label: GitHub Username(s) (separated by a single space if multiple)
type: string
minLength: 3
- key: Issue
label: GitHub Issue Number (separated by a single space if multiple)
type: string
minLength: 1
footerFormat: |
{{- $contributorDict := dict }}
{{- /* ensure all names in this list are all lowercase for later matching purposes */}}
{{- $core_team := splitList " " .Env.CORE_TEAM }}
{{- /* ensure we always skip snyk and dependabot in addition to the core team */}}
{{- $maintainers := list "dependabot[bot]" "snyk-bot"}}
{{- range $team_member := $core_team }}
{{- $team_member_lower := lower $team_member }}
{{- $maintainers = append $maintainers $team_member_lower }}
{{- end }}
{{- /* ensure we always skip snyk and dependabot */}}
{{- $bots := list "dependabot[bot]" "snyk-bot"}}
{{- range $change := .Changes }}
{{- $authorList := splitList " " $change.Custom.Author }}
{{- /* loop through all authors for a single changelog */}}
{{- range $author := $authorList }}
{{- $authorLower := lower $author }}
{{- /* we only want to include non-core team contributors */}}
{{- if not (has $authorLower $maintainers)}}
{{- /* we only want to include non-bot contributors */}}
{{- if not (has $authorLower $bots)}}
{{- $changeList := splitList " " $change.Custom.Author }}
{{- $IssueList := list }}
{{- $changeLink := $change.Kind }}

View File

@@ -10,6 +10,5 @@ ignore =
E704 # makes Flake8 work like black
E741
E501 # long line checking is done in black
exclude = test/
per-file-ignores =
*/__init__.py: F401

View File

@@ -1 +1 @@
../../../test/setup_db.sh
../../../scripts/setup_db.sh

169
.github/dbt-postgres-testing.yml vendored Normal file
View File

@@ -0,0 +1,169 @@
# **what?**
# Runs all tests in dbt-postgres with this branch of dbt-core to ensure nothing is broken
# **why?**
# Ensure dbt-core changes do not break dbt-postgres, as a basic proxy for other adapters
# **when?**
# This will run when trying to merge a PR into main.
# It can also be manually triggered.
# This workflow can be skipped by adding the "Skip Postgres Testing" label to the PR. This is
# useful when making a change in both `dbt-postgres` and `dbt-core` where the changes are dependant
# and cause the other repository to break.
name: "dbt-postgres Tests"
run-name: >-
${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call')
&& format('dbt-postgres@{0} with dbt-core@{1}', inputs.dbt-postgres-ref, inputs.dbt-core-ref)
|| 'dbt-postgres@main with dbt-core branch' }}
on:
push:
branches:
- "main"
- "*.latest"
- "releases/*"
pull_request:
merge_group:
types: [checks_requested]
workflow_dispatch:
inputs:
dbt-postgres-ref:
description: "The branch of dbt-postgres to test against"
default: "main"
dbt-core-ref:
description: "The branch of dbt-core to test against"
default: "main"
workflow_call:
inputs:
dbt-postgres-ref:
description: "The branch of dbt-postgres to test against"
type: string
required: true
default: "main"
dbt-core-ref:
description: "The branch of dbt-core to test against"
type: string
required: true
default: "main"
permissions: read-all
# will cancel previous workflows triggered by the same event
# and for the same ref for PRs/merges or same SHA otherwise
# and for the same inputs on workflow_dispatch or workflow_call
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(fromJson('["pull_request", "merge_group"]'), github.event_name) && github.event.pull_request.head.ref || github.sha }}-${{ contains(fromJson('["workflow_call", "workflow_dispatch"]'), github.event_name) && github.event.inputs.dbt-postgres-ref && github.event.inputs.dbt-core-ref || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash
jobs:
job-prep:
# This allow us to run the workflow on pull_requests as well so we can always run unit tests
# and only run integration tests on merge for time purposes
name: Setup Repo Refs
runs-on: ubuntu-latest
outputs:
dbt-postgres-ref: ${{ steps.core-ref.outputs.ref }}
dbt-core-ref: ${{ steps.common-ref.outputs.ref }}
steps:
- name: "Input Refs"
id: job-inputs
run: |
echo "inputs.dbt-postgres-ref=${{ inputs.dbt-postgres-ref }}"
echo "inputs.dbt-core-ref=${{ inputs.dbt-core-ref }}"
- name: "Determine dbt-postgres ref"
id: core-ref
run: |
if [[ -z "${{ inputs.dbt-postgres-ref }}" ]]; then
REF="main"
else
REF=${{ inputs.dbt-postgres-ref }}
fi
echo "ref=$REF" >> $GITHUB_OUTPUT
- name: "Determine dbt-core ref"
id: common-ref
run: |
if [[ -z "${{ inputs.dbt-core-ref }}" ]]; then
# these will be commits instead of branches
if [[ "${{ github.event_name }}" == "merge_group" ]]; then
REF=${{ github.event.merge_group.head_sha }}
else
REF=${{ github.event.pull_request.base.sha }}
fi
else
REF=${{ inputs.dbt-core-ref }}
fi
echo "ref=$REF" >> $GITHUB_OUTPUT
- name: "Final Refs"
run: |
echo "dbt-postgres-ref=${{ steps.core-ref.outputs.ref }}"
echo "dbt-core-ref=${{ steps.common-ref.outputs.ref }}"
integration-tests-postgres:
name: "dbt-postgres integration tests"
needs: [job-prep]
runs-on: ubuntu-latest
defaults:
run:
working-directory: "./dbt-postgres"
environment:
name: "dbt-postgres"
env:
POSTGRES_TEST_HOST: ${{ vars.POSTGRES_TEST_HOST }}
POSTGRES_TEST_PORT: ${{ vars.POSTGRES_TEST_PORT }}
POSTGRES_TEST_USER: ${{ vars.POSTGRES_TEST_USER }}
POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }}
POSTGRES_TEST_DATABASE: ${{ vars.POSTGRES_TEST_DATABASE }}
POSTGRES_TEST_THREADS: ${{ vars.POSTGRES_TEST_THREADS }}
services:
postgres:
image: postgres
env:
POSTGRES_PASSWORD: postgres
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- ${{ vars.POSTGRES_TEST_PORT }}:5432
steps:
- name: "Check out dbt-adapters@${{ needs.job-prep.outputs.dbt-postgres-ref }}"
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
repository: dbt-labs/dbt-adapters
ref: ${{ needs.job-prep.outputs.dbt-postgres-ref }}
- name: "Set up Python"
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}
- name: "Set environment variables"
run: |
echo "HATCH_PYTHON=${{ inputs.python-version }}" >> $GITHUB_ENV
echo "PIP_ONLY_BINARY=psycopg2-binary" >> $GITHUB_ENV
- name: "Setup test database"
run: psql -f ./scripts/setup_test_database.sql
env:
PGHOST: ${{ vars.POSTGRES_TEST_HOST }}
PGPORT: ${{ vars.POSTGRES_TEST_PORT }}
PGUSER: postgres
PGPASSWORD: postgres
PGDATABASE: postgres
- name: "Install hatch"
uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # pypa/hatch@install
- name: "Run integration tests"
run: hatch run ${{ inputs.hatch-env }}:integration-tests

View File

@@ -200,16 +200,15 @@ jobs:
- name: "Install Python Dependencies"
run: |
python -m venv env
source env/bin/activate
python -m pip install --upgrade pip
python -m pip install hatch
- name: "Bump Version To ${{ needs.cleanup_changelog.outputs.next-version }}"
run: |
source env/bin/activate
python -m pip install -r dev-requirements.txt
env/bin/bumpversion --allow-dirty --new-version ${{ needs.cleanup_changelog.outputs.next-version }} major
git status
cd core
hatch version ${{ needs.cleanup_changelog.outputs.next-version }}
hatch run dev-req
dbt --version
- name: "Commit Version Bump to Branch"
run: |
@@ -250,13 +249,13 @@ jobs:
- name: "Cleanup - Remove Trailing Whitespace Via Pre-commit"
continue-on-error: true
run: |
pre-commit run trailing-whitespace --files .bumpversion.cfg CHANGELOG.md .changes/* || true
pre-commit run trailing-whitespace --files CHANGELOG.md .changes/* || true
# this step will fail on newline errors but also correct them
- name: "Cleanup - Remove Extra Newlines Via Pre-commit"
continue-on-error: true
run: |
pre-commit run end-of-file-fixer --files .bumpversion.cfg CHANGELOG.md .changes/* || true
pre-commit run end-of-file-fixer --files CHANGELOG.md .changes/* || true
- name: "Commit Version Bump to Branch"
run: |

View File

@@ -60,16 +60,22 @@ jobs:
run: |
python -m pip install --user --upgrade pip
python -m pip --version
make dev
make dev_req
mypy --version
dbt --version
python -m pip install hatch
cd core
hatch run setup
- name: Verify dbt installation
run: |
cd core
hatch run dbt --version
- name: Run pre-commit hooks
run: pre-commit run --all-files --show-diff-on-failure
run: |
cd core
hatch run code-quality
unit:
name: unit test / python ${{ matrix.python-version }}
name: "unit test / python ${{ matrix.python-version }}"
runs-on: ubuntu-latest
timeout-minutes: 10
@@ -79,9 +85,6 @@ jobs:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
env:
TOXENV: "unit"
steps:
- name: Check out the repository
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
@@ -95,15 +98,15 @@ jobs:
run: |
python -m pip install --user --upgrade pip
python -m pip --version
python -m pip install tox
tox --version
python -m pip install hatch
hatch --version
- name: Run unit tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # nick-fields/retry@v3
with:
timeout_minutes: 10
max_attempts: 3
command: tox -e unit
command: cd core && hatch run ci:unit-tests
- name: Get current date
if: always()
@@ -156,7 +159,7 @@ jobs:
echo "include=${INCLUDE_GROUPS}" >> $GITHUB_OUTPUT
integration-postgres:
name: (${{ matrix.split-group }}) integration test / python ${{ matrix.python-version }} / ${{ matrix.os }}
name: "(${{ matrix.split-group }}) integration test / python ${{ matrix.python-version }} / ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
timeout-minutes: 30
@@ -169,7 +172,6 @@ jobs:
os: ["ubuntu-latest"]
split-group: ${{ fromJson(needs.integration-metadata.outputs.split-groups) }}
env:
TOXENV: integration
DBT_INVOCATION_ENV: github-actions
DBT_TEST_USER_1: dbt_test_user_1
DBT_TEST_USER_2: dbt_test_user_2
@@ -209,7 +211,7 @@ jobs:
- name: Run postgres setup script
run: |
./test/setup_db.sh
./scripts/setup_db.sh
env:
PGHOST: localhost
PGPORT: 5432
@@ -219,17 +221,16 @@ jobs:
run: |
python -m pip install --user --upgrade pip
python -m pip --version
python -m pip install tox
tox --version
python -m pip install hatch
hatch --version
- name: Run integration tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 3
command: tox -- --ddtrace
env:
PYTEST_ADDOPTS: ${{ format('--splits {0} --group {1}', env.PYTHON_INTEGRATION_TEST_WORKERS, matrix.split-group) }}
shell: bash
command: cd core && hatch run ci:integration-tests -- --ddtrace --splits ${{ env.PYTHON_INTEGRATION_TEST_WORKERS }} --group ${{ matrix.split-group }}
- name: Get current date
if: always()
@@ -265,7 +266,6 @@ jobs:
# already includes split group and runs mac + windows
include: ${{ fromJson(needs.integration-metadata.outputs.include) }}
env:
TOXENV: integration
DBT_INVOCATION_ENV: github-actions
DBT_TEST_USER_1: dbt_test_user_1
DBT_TEST_USER_2: dbt_test_user_2
@@ -292,7 +292,7 @@ jobs:
with:
timeout_minutes: 10
max_attempts: 3
command: ./test/setup_db.sh
command: ./scripts/setup_db.sh
- name: Set up postgres (windows)
if: runner.os == 'Windows'
@@ -302,17 +302,16 @@ jobs:
run: |
python -m pip install --user --upgrade pip
python -m pip --version
python -m pip install tox
tox --version
python -m pip install hatch
hatch --version
- name: Run integration tests
uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 3
command: tox -- --ddtrace
env:
PYTEST_ADDOPTS: ${{ format('--splits {0} --group {1}', env.PYTHON_INTEGRATION_TEST_WORKERS, matrix.split-group) }}
shell: bash
command: cd core && hatch run ci:integration-tests -- --ddtrace --splits ${{ env.PYTHON_INTEGRATION_TEST_WORKERS }} --group ${{ matrix.split-group }}
- name: Get current date
if: always()
@@ -369,7 +368,7 @@ jobs:
- name: Install python dependencies
run: |
python -m pip install --user --upgrade pip
python -m pip install --upgrade setuptools wheel twine check-wheel-contents
python -m pip install --upgrade hatch twine check-wheel-contents
python -m pip --version
- name: Build distributions
@@ -378,27 +377,7 @@ jobs:
- name: Show distributions
run: ls -lh dist/
- name: Check distribution descriptions
- name: Check and verify distributions
run: |
twine check dist/*
- name: Check wheel contents
run: |
check-wheel-contents dist/*.whl --ignore W007,W008
- name: Install wheel distributions
run: |
find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
- name: Check wheel distributions
run: |
dbt --version
- name: Install source distributions
# ignore dbt-1.0.0, which intentionally raises an error when installed from source
run: |
find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |
dbt --version
cd core
hatch run build:check-all

View File

@@ -1,265 +0,0 @@
# **what?**
# This workflow models the performance characteristics of a point in time in dbt.
# It runs specific dbt commands on committed projects multiple times to create and
# commit information about the distribution to the current branch. For more information
# see the readme in the performance module at /performance/README.md.
#
# **why?**
# When developing new features, we can take quick performance samples and compare
# them against the commited baseline measurements produced by this workflow to detect
# some performance regressions at development time before they reach users.
#
# **when?**
# This is only run once directly after each release (for non-prereleases). If for some
# reason the results of a run are not satisfactory, it can also be triggered manually.
name: Model Performance Characteristics
on:
# runs after non-prereleases are published.
release:
types: [released]
# run manually from the actions tab
workflow_dispatch:
inputs:
release_id:
description: 'dbt version to model (must be non-prerelease in Pypi)'
type: string
required: true
env:
RUNNER_CACHE_PATH: performance/runner/target/release/runner
# both jobs need to write
permissions:
contents: write
pull-requests: write
jobs:
set-variables:
name: Setting Variables
runs-on: ${{ vars.UBUNTU_LATEST }}
outputs:
cache_key: ${{ steps.variables.outputs.cache_key }}
release_id: ${{ steps.semver.outputs.base-version }}
release_branch: ${{ steps.variables.outputs.release_branch }}
steps:
# explicitly checkout the performance runner from main regardless of which
# version we are modeling.
- name: Checkout
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
ref: main
- name: Parse version into parts
id: semver
uses: dbt-labs/actions/parse-semver@v1
with:
version: ${{ github.event.inputs.release_id || github.event.release.tag_name }}
# collect all the variables that need to be used in subsequent jobs
- name: Set variables
id: variables
run: |
# create a cache key that will be used in the next job. without this the
# next job would have to checkout from main and hash the files itself.
echo "cache_key=${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}" >> $GITHUB_OUTPUT
branch_name="${{steps.semver.outputs.major}}.${{steps.semver.outputs.minor}}.latest"
echo "release_branch=$branch_name" >> $GITHUB_OUTPUT
echo "release branch is inferred to be ${branch_name}"
latest-runner:
name: Build or Fetch Runner
runs-on: ${{ vars.UBUNTU_LATEST }}
needs: [set-variables]
env:
RUSTFLAGS: "-D warnings"
steps:
- name: '[DEBUG] print variables'
run: |
echo "all variables defined in set-variables"
echo "cache_key: ${{ needs.set-variables.outputs.cache_key }}"
echo "release_id: ${{ needs.set-variables.outputs.release_id }}"
echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"
# explicitly checkout the performance runner from main regardless of which
# version we are modeling.
- name: Checkout
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
ref: main
# attempts to access a previously cached runner
- uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # actions/cache@v4
id: cache
with:
path: ${{ env.RUNNER_CACHE_PATH }}
key: ${{ needs.set-variables.outputs.cache_key }}
- name: Fetch Rust Toolchain
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Add fmt
if: steps.cache.outputs.cache-hit != 'true'
run: rustup component add rustfmt
- name: Cargo fmt
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # actions-rs/cargo@v1
with:
command: fmt
args: --manifest-path performance/runner/Cargo.toml --all -- --check
- name: Test
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # actions-rs/cargo@v1
with:
command: test
args: --manifest-path performance/runner/Cargo.toml
- name: Build (optimized)
if: steps.cache.outputs.cache-hit != 'true'
uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # actions-rs/cargo@v1
with:
command: build
args: --release --manifest-path performance/runner/Cargo.toml
# the cache action automatically caches this binary at the end of the job
model:
# depends on `latest-runner` as a separate job so that failures in this job do not prevent
# a successfully tested and built binary from being cached.
needs: [set-variables, latest-runner]
name: Model a release
runs-on: ${{ vars.UBUNTU_LATEST }}
steps:
- name: '[DEBUG] print variables'
run: |
echo "all variables defined in set-variables"
echo "cache_key: ${{ needs.set-variables.outputs.cache_key }}"
echo "release_id: ${{ needs.set-variables.outputs.release_id }}"
echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"
- name: Setup Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # actions/setup-python@v6
with:
python-version: "3.10"
- name: Install dbt
run: pip install dbt-postgres==${{ needs.set-variables.outputs.release_id }}
- name: Install Hyperfine
run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb
# explicitly checkout main to get the latest project definitions
- name: Checkout
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
ref: main
# this was built in the previous job so it will be there.
- name: Fetch Runner
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # actions/cache@v4
id: cache
with:
path: ${{ env.RUNNER_CACHE_PATH }}
key: ${{ needs.set-variables.outputs.cache_key }}
- name: Move Runner
run: mv performance/runner/target/release/runner performance/app
- name: Change Runner Permissions
run: chmod +x ./performance/app
- name: '[DEBUG] ls baseline directory before run'
run: ls -R performance/baselines/
# `${{ github.workspace }}` is used to pass the absolute path
- name: Create directories
run: |
mkdir ${{ github.workspace }}/performance/tmp/
mkdir -p performance/baselines/${{ needs.set-variables.outputs.release_id }}/
# Run modeling with taking 20 samples
- name: Run Measurement
run: |
performance/app model -v ${{ needs.set-variables.outputs.release_id }} -b ${{ github.workspace }}/performance/baselines/ -p ${{ github.workspace }}/performance/projects/ -t ${{ github.workspace }}/performance/tmp/ -n 20
- name: '[DEBUG] ls baseline directory after run'
run: ls -R performance/baselines/
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # actions/upload-artifact@v4
with:
name: baseline
path: performance/baselines/${{ needs.set-variables.outputs.release_id }}/
create-pr:
name: Open PR for ${{ matrix.base-branch }}
# depends on `model` as a separate job so that the baseline can be committed to more than one branch
# i.e. release branch and main
needs: [set-variables, latest-runner, model]
runs-on: ${{ vars.UBUNTU_LATEST }}
strategy:
matrix:
include:
- base-branch: refs/heads/main
target-branch: performance-bot/main_${{ needs.set-variables.outputs.release_id }}_${{GITHUB.RUN_ID}}
- base-branch: refs/heads/${{ needs.set-variables.outputs.release_branch }}
target-branch: performance-bot/release_${{ needs.set-variables.outputs.release_id }}_${{GITHUB.RUN_ID}}
steps:
- name: '[DEBUG] print variables'
run: |
echo "all variables defined in set-variables"
echo "cache_key: ${{ needs.set-variables.outputs.cache_key }}"
echo "release_id: ${{ needs.set-variables.outputs.release_id }}"
echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"
- name: Checkout
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
ref: ${{ matrix.base-branch }}
- name: Create PR branch
run: |
git checkout -b ${{ matrix.target-branch }}
git push origin ${{ matrix.target-branch }}
git branch --set-upstream-to=origin/${{ matrix.target-branch }} ${{ matrix.target-branch }}
- uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # actions/download-artifact@v4
with:
name: baseline
path: performance/baselines/${{ needs.set-variables.outputs.release_id }}
- name: '[DEBUG] ls baselines after artifact download'
run: ls -R performance/baselines/
- name: Commit baseline
uses: EndBug/add-and-commit@a94899bca583c204427a224a7af87c02f9b325d5 # EndBug/add-and-commit@v9
with:
add: 'performance/baselines/*'
author_name: 'Github Build Bot'
author_email: 'buildbot@fishtownanalytics.com'
message: 'adding performance baseline for ${{ needs.set-variables.outputs.release_id }}'
push: 'origin origin/${{ matrix.target-branch }}'
- name: Create Pull Request
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # peter-evans/create-pull-request@v7
with:
author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
base: ${{ matrix.base-branch }}
branch: '${{ matrix.target-branch }}'
title: 'Adding performance modeling for ${{needs.set-variables.outputs.release_id}} to ${{ matrix.base-branch }}'
body: 'Committing perf results for tracking for the ${{needs.set-variables.outputs.release_id}}'
labels: |
Skip Changelog
Performance

View File

@@ -46,7 +46,7 @@ jobs:
- name: "Get Current Version Number"
id: version-number-sources
run: |
current_version=`awk -F"current_version = " '{print $2}' .bumpversion.cfg | tr '\n' ' '`
current_version=$(grep '^version = ' core/dbt/__version__.py | sed 's/version = "\(.*\)"/\1/')
echo "current_version=$current_version" >> $GITHUB_OUTPUT
- name: "Audit Version And Parse Into Parts"

View File

@@ -72,12 +72,15 @@ defaults:
run:
shell: bash
env:
MIN_HATCH_VERSION: "1.11.0"
jobs:
job-setup:
name: Log Inputs
runs-on: ${{ vars.UBUNTU_LATEST }}
outputs:
starting_sha: ${{ steps.set_sha.outputs.starting_sha }}
use_hatch: ${{ steps.use_hatch.outputs.use_hatch }}
steps:
- name: "[DEBUG] Print Variables"
run: |
@@ -88,19 +91,29 @@ jobs:
echo Nightly release: ${{ inputs.nightly_release }}
echo Only Docker: ${{ inputs.only_docker }}
- name: "Checkout target branch"
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
ref: ${{ inputs.target_branch }}
# release-prep.yml really shouldn't take in the sha but since core + all adapters
# depend on it now this workaround lets us not input it manually with risk of error.
# The changes always get merged into the head so we can't use a specific commit for
# releases anyways.
- name: "Capture sha"
id: set_sha
# In version env.HATCH_VERSION we started to use hatch for build tooling. Before that we used setuptools.
# This needs to check if we're using hatch or setuptools based on the version being released. We should
# check if the version is greater than or equal to env.HATCH_VERSION. If it is, we use hatch, otherwise we use setuptools.
- name: "Check if using hatch"
id: use_hatch
run: |
echo "starting_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
# Extract major.minor from versions like 1.11.0a1 -> 1.11
INPUT_MAJ_MIN=$(echo "${{ inputs.version_number }}" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
HATCH_MAJ_MIN=$(echo "${{ env.MIN_HATCH_VERSION }}" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
if [ $(echo "$INPUT_MAJ_MIN >= $HATCH_MAJ_MIN" | bc) -eq 1 ]; then
echo "use_hatch=true" >> $GITHUB_OUTPUT
else
echo "use_hatch=false" >> $GITHUB_OUTPUT
fi
- name: "Notify if using hatch"
run: |
if [ ${{ steps.use_hatch.outputs.use_hatch }} = "true" ]; then
echo "::notice title="Using Hatch": $title::Using Hatch for release"
else
echo "::notice title="Using Setuptools": $title::Using Setuptools for release"
fi
bump-version-generate-changelog:
name: Bump package version, Generate changelog
@@ -110,12 +123,13 @@ jobs:
uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main
with:
sha: ${{ needs.job-setup.outputs.starting_sha }}
version_number: ${{ inputs.version_number }}
hatch_directory: "core"
target_branch: ${{ inputs.target_branch }}
env_setup_script_path: "scripts/env-setup.sh"
test_run: ${{ inputs.test_run }}
nightly_release: ${{ inputs.nightly_release }}
use_hatch: ${{ needs.job-setup.outputs.use_hatch == 'true' }} # workflow outputs are strings...
secrets: inherit
@@ -143,16 +157,13 @@ jobs:
with:
sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }}
version_number: ${{ inputs.version_number }}
hatch_directory: "core"
changelog_path: ${{ needs.bump-version-generate-changelog.outputs.changelog_path }}
build_script_path: "scripts/build-dist.sh"
s3_bucket_name: "core-team-artifacts"
package_test_command: "dbt --version"
test_run: ${{ inputs.test_run }}
nightly_release: ${{ inputs.nightly_release }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
use_hatch: ${{ needs.job-setup.outputs.use_hatch == 'true' }} # workflow outputs are strings...
github-release:
name: GitHub Release

View File

@@ -22,7 +22,7 @@ on:
target_branch:
description: "The branch to check against"
type: string
default: 'main'
default: "main"
required: true
# no special access is needed
@@ -48,8 +48,8 @@ jobs:
- name: Checkout dbt repo
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
path: ${{ env.DBT_REPO_DIRECTORY }}
ref: ${{ inputs.target_branch }}
path: ${{ env.DBT_REPO_DIRECTORY }}
ref: ${{ inputs.target_branch }}
- name: Check for changes in core/dbt/artifacts
# https://github.com/marketplace/actions/paths-changes-filter
@@ -72,18 +72,16 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with:
repository: dbt-labs/schemas.getdbt.com
ref: 'main'
ref: "main"
path: ${{ env.SCHEMA_REPO_DIRECTORY }}
- name: Generate current schema
if: steps.check_artifact_changes.outputs.artifacts_changed == 'true'
run: |
cd ${{ env.DBT_REPO_DIRECTORY }}
python3 -m venv env
source env/bin/activate
pip install --upgrade pip
pip install -r dev-requirements.txt -r editable-requirements.txt
python scripts/collect-artifact-schema.py --path ${{ env.LATEST_SCHEMA_PATH }}
cd ${{ env.DBT_REPO_DIRECTORY }}/core
pip install --upgrade pip hatch
hatch run setup
hatch run json-schema -- --path ${{ env.LATEST_SCHEMA_PATH }}
# Copy generated schema files into the schemas.getdbt.com repo
# Do a git diff to find any changes
@@ -99,5 +97,5 @@ jobs:
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # actions/upload-artifact@v4
if: ${{ failure() && steps.check_artifact_changes.outputs.artifacts_changed == 'true' }}
with:
name: 'schema_changes.txt'
path: '${{ env.SCHEMA_DIFF_ARTIFACT }}'
name: "schema_changes.txt"
path: "${{ env.SCHEMA_DIFF_ARTIFACT }}"

View File

@@ -102,12 +102,12 @@ jobs:
run: |
pip install --user --upgrade pip
pip --version
pip install tox
tox --version
pip install hatch
hatch --version
- name: Run postgres setup script
run: |
./test/setup_db.sh
./scripts/setup_db.sh
env:
PGHOST: localhost
PGPORT: 5432
@@ -123,7 +123,7 @@ jobs:
with:
timeout_minutes: 30
max_attempts: 3
command: tox -e integration -- -nauto
command: cd core && hatch run ci:integration-tests -- -nauto
env:
PYTEST_ADDOPTS: ${{ format('--splits {0} --group {1}', env.PYTHON_INTEGRATION_TEST_WORKERS, matrix.split-group) }}

View File

@@ -14,33 +14,33 @@ on:
workflow_dispatch:
inputs:
branch:
description: 'Branch to check out'
description: "Branch to check out"
type: string
required: true
default: 'main'
default: "main"
test_path:
description: 'Path to single test to run (ex: tests/functional/retry/test_retry.py::TestRetry::test_fail_fast)'
description: "Path to single test to run (ex: tests/functional/retry/test_retry.py::TestRetry::test_fail_fast)"
type: string
required: true
default: 'tests/functional/...'
default: "tests/functional/..."
python_version:
description: 'Version of Python to Test Against'
description: "Version of Python to Test Against"
type: choice
options:
- '3.10'
- '3.11'
- "3.10"
- "3.11"
os:
description: 'OS to run test in'
description: "OS to run test in"
type: choice
options:
- 'ubuntu-latest'
- 'macos-14'
- 'windows-latest'
- "ubuntu-latest"
- "macos-14"
- "windows-latest"
num_runs_per_batch:
description: 'Max number of times to run the test per batch. We always run 10 batches.'
description: "Max number of times to run the test per batch. We always run 10 batches."
type: number
required: true
default: '50'
default: "50"
permissions: read-all
@@ -90,12 +90,19 @@ jobs:
with:
python-version: "${{ inputs.python_version }}"
- name: "Install hatch"
run: python -m pip install --user --upgrade pip hatch
- name: "Setup Dev Environment"
run: make dev
run: |
cd core
hatch run setup
- name: "Set up postgres (linux)"
if: inputs.os == '${{ vars.UBUNTU_LATEST }}'
run: make setup-db
run: |
cd core
hatch run setup-db
# mac and windows don't use make due to limitations with docker with those runners in GitHub
- name: Set up postgres (macos)
@@ -104,7 +111,7 @@ jobs:
with:
timeout_minutes: 10
max_attempts: 3
command: ./test/setup_db.sh
command: ./scripts/setup_db.sh
- name: "Set up postgres (windows)"
if: inputs.os == 'windows-latest'
@@ -153,5 +160,5 @@ jobs:
- name: "Error for Failures"
if: ${{ steps.pytest.outputs.failure }}
run: |
echo "Batch ${{ matrix.batch }} failed ${{ steps.pytest.outputs.failure }} of ${{ inputs.num_runs_per_batch }} tests"
exit 1
echo "Batch ${{ matrix.batch }} failed ${{ steps.pytest.outputs.failure }} of ${{ inputs.num_runs_per_batch }} tests"
exit 1

2
.gitignore vendored
View File

@@ -15,6 +15,7 @@ build/
!core/dbt/docs/build
develop-eggs/
dist/
dist-*/
downloads/
eggs/
.eggs/
@@ -95,6 +96,7 @@ target/
# pycharm
.idea/
venv/
.venv*/
# AWS credentials
.aws/

View File

@@ -84,7 +84,7 @@ repos:
types: [python]
- id: no_versioned_artifact_resource_imports
name: no_versioned_artifact_resource_imports
entry: python custom-hooks/no_versioned_artifact_resource_imports.py
entry: python scripts/pre-commit-hooks/no_versioned_artifact_resource_imports.py
language: system
files: ^core/dbt/
types: [python]

View File

@@ -17,10 +17,6 @@ The main subdirectories of core/dbt:
- [`parser`](core/dbt/parser/README.md): Read project files, validate, construct python objects
- [`task`](core/dbt/task/README.md): Set forth the actions that dbt can perform when invoked
Legacy tests are found in the 'test' directory:
- [`unit tests`](core/dbt/test/unit/README.md): Unit tests
- [`integration tests`](core/dbt/test/integration/README.md): Integration tests
### Invoking dbt
The "tasks" map to top-level dbt commands. So `dbt run` => task.run.RunTask, etc. Some are more like abstract base classes (GraphRunnableTask, for example) but all the concrete types outside of task should map to tasks. Currently one executes at a time. The tasks kick off their “Runners” and those do execute in parallel. The parallelism is managed via a thread pool, in GraphRunnableTask.
@@ -45,10 +41,9 @@ The Postgres adapter code is the most central, and many of its implementations a
## Testing dbt
The [`test/`](test/) subdirectory includes unit and integration tests that run as continuous integration checks against open pull requests. Unit tests check mock inputs and outputs of specific python functions. Integration tests perform end-to-end dbt invocations against real adapters (Postgres, Redshift, Snowflake, BigQuery) and assert that the results match expectations. See [the contributing guide](CONTRIBUTING.md) for a step-by-step walkthrough of setting up a local development and testing environment.
The [`tests/`](tests/) subdirectory includes unit and fuctional tests that run as continuous integration checks against open pull requests. Unit tests check mock inputs and outputs of specific python functions. Functional tests perform end-to-end dbt invocations against real adapters (Postgres) and assert that the results match expectations. See [the contributing guide](CONTRIBUTING.md) for a step-by-step walkthrough of setting up a local development and testing environment.
## Everything else
- [docker](docker/): All dbt versions are published as Docker images on DockerHub. This subfolder contains the `Dockerfile` (constant) and `requirements.txt` (one for each version).
- [etc](etc/): Images for README
- [scripts](scripts/): Helper scripts for testing, releasing, and producing JSON schemas. These are not included in distributions of dbt, nor are they rigorously tested—they're just handy tools for the dbt maintainers :)

View File

@@ -20,9 +20,8 @@
- [Testing](#testing)
- [Initial setup](#initial-setup)
- [Test commands](#test-commands)
- [Makefile](#makefile)
- [Hatch scripts](#hatch-scripts)
- [`pre-commit`](#pre-commit)
- [`tox`](#tox)
- [`pytest`](#pytest)
- [Unit, Integration, Functional?](#unit-integration-functional)
- [Debugging](#debugging)
@@ -35,7 +34,7 @@
There are many ways to contribute to the ongoing development of `dbt-core`, such as by participating in discussions and issues. We encourage you to first read our higher-level document: ["Expectations for Open Source Contributors"](https://docs.getdbt.com/docs/contributing/oss-expectations).
The rest of this document serves as a more granular guide for contributing code changes to `dbt-core` (this repository). It is not intended as a guide for using `dbt-core`, and some pieces assume a level of familiarity with Python development (virtualenvs, `pip`, etc). Specific code snippets in this guide assume you are using macOS or Linux and are comfortable with the command line.
The rest of this document serves as a more granular guide for contributing code changes to `dbt-core` (this repository). It is not intended as a guide for using `dbt-core`, and some pieces assume a level of familiarity with Python development and package managers. Specific code snippets in this guide assume you are using macOS or Linux and are comfortable with the command line.
If you get stuck, we're happy to help! Drop us a line in the `#dbt-core-development` channel in the [dbt Community Slack](https://community.getdbt.com).
@@ -74,28 +73,22 @@ There are some tools that will be helpful to you in developing locally. While th
These are the tools used in `dbt-core` development and testing:
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.10, 3.11, 3.12, and 3.13
- [`hatch`](https://hatch.pypa.io/) for build backend, environment management, and running tests across Python versions (3.10, 3.11, 3.12, and 3.13)
- [`pytest`](https://docs.pytest.org/en/latest/) to define, discover, and run tests
- [`flake8`](https://flake8.pycqa.org/en/latest/) for code linting
- [`black`](https://github.com/psf/black) for code formatting
- [`mypy`](https://mypy.readthedocs.io/en/stable/) for static type checking
- [`pre-commit`](https://pre-commit.com) to easily run those checks
- [`changie`](https://changie.dev/) to create changelog entries, without merge conflicts
- [`make`](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) to run multiple setup or test steps in combination. Don't worry too much, nobody _really_ understands how `make` works, and our Makefile aims to be super simple.
- [GitHub Actions](https://github.com/features/actions) for automating tests and checks, once a PR is pushed to the `dbt-core` repository
A deep understanding of these tools in not required to effectively contribute to `dbt-core`, but we recommend checking out the attached documentation if you're interested in learning more about each one.
#### Virtual environments
We strongly recommend using virtual environments when developing code in `dbt-core`. We recommend creating this virtualenv
in the root of the `dbt-core` repository. To create a new virtualenv, run:
```sh
python3 -m venv env
source env/bin/activate
```
dbt-core uses [Hatch](https://hatch.pypa.io/) for dependency and environment management. Hatch automatically creates and manages isolated environments for development, testing, and building, so you don't need to manually create virtual environments.
This will create and activate a new Python virtual environment.
For more information on how Hatch manages environments, see the [Hatch environment documentation](https://hatch.pypa.io/latest/environment/).
#### Docker and `docker-compose`
@@ -114,22 +107,42 @@ brew install postgresql
### Installation
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt-core` (and its dependencies):
First make sure you have Python 3.10 or later installed. Ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `hatch`. Finally set up `dbt-core` for development:
```sh
make dev
cd core
hatch run setup
```
or, alternatively:
This will install all development dependencies and set up pre-commit hooks.
By default, hatch will use whatever Python version is active in your environment. To specify a particular Python version, set the `HATCH_PYTHON` environment variable:
```sh
pip install -r dev-requirements.txt -r editable-requirements.txt
pre-commit install
export HATCH_PYTHON=3.12
hatch env create
```
Or add it to your shell profile (e.g., `~/.zshrc` or `~/.bashrc`) for persistence.
When installed in this way, any changes you make to your local copy of the source code will be reflected immediately in your next `dbt` run.
#### Building dbt-core
dbt-core uses [Hatch](https://hatch.pypa.io/) (specifically `hatchling`) as its build backend. To build distribution packages:
```sh
cd core
hatch build
```
This will create both wheel (`.whl`) and source distribution (`.tar.gz`) files in the `dist/` directory.
The build configuration is defined in `core/pyproject.toml`. You can also use the standard `python -m build` command if you prefer.
### Running `dbt-core`
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
Once you've run `hatch run setup`, the `dbt` command will be available in your PATH. You can verify this by running `which dbt`.
Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as necessary to connect to your target databases. It may be a good idea to add a new profile pointing to a local Postgres instance, or a specific test sandbox within your data warehouse if appropriate. Make sure to create a profile before running integration tests.
@@ -147,45 +160,78 @@ Although `dbt-core` works with a number of different databases, you won't need t
Postgres offers the easiest way to test most `dbt-core` functionality today. They are the fastest to run, and the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
```sh
make setup-db
cd core
hatch run setup-db
```
or, alternatively:
Alternatively, you can run the setup commands directly:
```sh
docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash scripts/setup_db.sh
```
### Test commands
There are a few methods for running tests locally.
#### Makefile
#### Hatch scripts
There are multiple targets in the Makefile to run common test suites and code
checks, most notably:
The primary way to run tests and checks is using hatch scripts (defined in `core/hatch.toml`):
```sh
# Runs unit tests with py38 and code checks in parallel.
make test
# Runs postgres integration tests with py38 in "fail fast" mode.
make integration
```
> These make targets assume you have a local installation of a recent version of [`tox`](https://tox.readthedocs.io/en/latest/) for unit/integration testing and pre-commit for code quality checks,
> unless you use choose a Docker container to run tests. Run `make help` for more info.
cd core
Check out the other targets in the Makefile to see other commonly used test
suites.
# Run all unit tests
hatch run unit-tests
# Run unit tests and all code quality checks
hatch run test
# Run integration tests
hatch run integration-tests
# Run integration tests in fail-fast mode
hatch run integration-tests-fail-fast
# Run linting checks only
hatch run lint
hatch run flake8
hatch run mypy
hatch run black
# Run all pre-commit hooks
hatch run code-quality
# Clean build artifacts
hatch run clean
```
Hatch manages isolated environments and dependencies automatically. The commands above use the `default` environment which is recommended for most local development.
**Using the `ci` environment (optional)**
If you need to replicate exactly what runs in GitHub Actions (e.g., with coverage reporting), use the `ci` environment:
```sh
cd core
# Run unit tests with coverage
hatch run ci:unit-tests
# Run unit tests with a specific Python version
hatch run +py=3.11 ci:unit-tests
```
> **Note:** Most developers should use the default environment (`hatch run unit-tests`). The `ci` environment is primarily for debugging CI failures or running tests with coverage.
#### `pre-commit`
[`pre-commit`](https://pre-commit.com) takes care of running all code-checks for formatting and linting. Run `make dev` to install `pre-commit` in your local environment (we recommend running this command with a python virtual environment active). This command installs several pip executables including black, mypy, and flake8. Once this is done you can use any of the linter-based make targets as well as a git pre-commit hook that will ensure proper formatting and linting.
#### `tox`
[`tox`](https://tox.readthedocs.io/en/latest/) takes care of managing virtualenvs and install dependencies in order to run tests. You can also run tests in parallel, for example, you can run unit tests for Python 3.8, Python 3.9, Python 3.10 and Python 3.11 checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration for these tests in located in `tox.ini`.
[`pre-commit`](https://pre-commit.com) takes care of running all code-checks for formatting and linting. Run `hatch run setup` to install `pre-commit` in your local environment (we recommend running this command with a python virtual environment active). This installs several pip executables including black, mypy, and flake8. Once installed, hooks will run automatically on `git commit`, or you can run them manually with `hatch run code-quality`.
#### `pytest`
Finally, you can also run a specific test or group of tests using [`pytest`](https://docs.pytest.org/en/latest/) directly. With a virtualenv active and dev dependencies installed you can do things like:
Finally, you can also run a specific test or group of tests using [`pytest`](https://docs.pytest.org/en/latest/) directly. After running `hatch run setup`, you can run pytest commands like:
```sh
# run all unit tests in a file

View File

@@ -47,7 +47,7 @@ RUN curl -LO https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_V
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
RUN pip3 install -U tox wheel six setuptools pre-commit
RUN pip3 install -U hatch wheel pre-commit
# These args are passed in via docker-compose, which reads then from the .env file.
# On Linux, run `make .env` to create the .env file for the current user.
@@ -62,7 +62,6 @@ RUN if [ ${USER_ID:-0} -ne 0 ] && [ ${GROUP_ID:-0} -ne 0 ]; then \
useradd -mU -l dbt_test_user; \
fi
RUN mkdir /usr/app && chown dbt_test_user /usr/app
RUN mkdir /home/tox && chown dbt_test_user /home/tox
WORKDIR /usr/app
VOLUME /usr/app

1
LICENSE Symbolic link
View File

@@ -0,0 +1 @@
core/LICENSE

163
Makefile
View File

@@ -1,146 +1,95 @@
# ============================================================================
# DEPRECATED: This Makefile is maintained for backwards compatibility only.
#
# dbt-core now uses Hatch for task management and development workflows.
# Please migrate to using hatch commands directly:
#
# make dev → cd core && hatch run setup
# make unit → cd core && hatch run unit-tests
# make test → cd core && hatch run test
# make integration → cd core && hatch run integration-tests
# make lint → cd core && hatch run lint
# make code_quality → cd core && hatch run code-quality
# make setup-db → cd core && hatch run setup-db
# make clean → cd core && hatch run clean
#
# See core/pyproject.toml [tool.hatch.envs.default.scripts] for all available
# commands and CONTRIBUTING.md for detailed usage instructions.
#
# This Makefile will be removed in a future version of dbt-core.
# ============================================================================
.DEFAULT_GOAL:=help
# Optional flag to run target in a docker container.
# (example `make test USE_DOCKER=true`)
ifeq ($(USE_DOCKER),true)
DOCKER_CMD := docker-compose run --rm test
endif
#
# To override CI_flags, create a file at this repo's root dir named `makefile.test.env`. Fill it
# with any ENV_VAR overrides required by your test environment, e.g.
# DBT_TEST_USER_1=user
# LOG_DIR="dir with a space in it"
#
# Warn: Restrict each line to one variable only.
#
ifeq (./makefile.test.env,$(wildcard ./makefile.test.env))
include ./makefile.test.env
endif
CI_FLAGS =\
DBT_TEST_USER_1=$(if $(DBT_TEST_USER_1),$(DBT_TEST_USER_1),dbt_test_user_1)\
DBT_TEST_USER_2=$(if $(DBT_TEST_USER_2),$(DBT_TEST_USER_2),dbt_test_user_2)\
DBT_TEST_USER_3=$(if $(DBT_TEST_USER_3),$(DBT_TEST_USER_3),dbt_test_user_3)\
RUSTFLAGS=$(if $(RUSTFLAGS),$(RUSTFLAGS),"-D warnings")\
LOG_DIR=$(if $(LOG_DIR),$(LOG_DIR),./logs)\
DBT_LOG_FORMAT=$(if $(DBT_LOG_FORMAT),$(DBT_LOG_FORMAT),json)
.PHONY: dev_req
dev_req: ## Installs dbt-* packages in develop mode along with only development dependencies.
@\
pip install -r dev-requirements.txt -r editable-requirements.txt
@cd core && hatch run dev-req
.PHONY: dev
dev: dev_req ## Installs dbt-* packages in develop mode along with development dependencies and pre-commit.
@\
$(DOCKER_CMD) pre-commit install
dev: ## Installs dbt-* packages in develop mode along with development dependencies and pre-commit.
@cd core && hatch run setup
.PHONY: dev-uninstall
dev-uninstall: ## Uninstall all packages in venv except for build tools
@\
pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y; \
pip uninstall -y dbt-core
@pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y; \
pip uninstall -y dbt-core
.PHONY: mypy
mypy: .env ## Runs mypy against staged changes for static type checking.
@\
$(DOCKER_CMD) pre-commit run --hook-stage manual mypy-check | grep -v "INFO"
mypy: ## Runs mypy against staged changes for static type checking.
@cd core && hatch run mypy
.PHONY: flake8
flake8: .env ## Runs flake8 against staged changes to enforce style guide.
@\
$(DOCKER_CMD) pre-commit run --hook-stage manual flake8-check | grep -v "INFO"
flake8: ## Runs flake8 against staged changes to enforce style guide.
@cd core && hatch run flake8
.PHONY: black
black: .env ## Runs black against staged changes to enforce style guide.
@\
$(DOCKER_CMD) pre-commit run --hook-stage manual black-check -v | grep -v "INFO"
black: ## Runs black against staged changes to enforce style guide.
@cd core && hatch run black
.PHONY: lint
lint: .env ## Runs flake8 and mypy code checks against staged changes.
@\
$(DOCKER_CMD) pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
$(DOCKER_CMD) pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
lint: ## Runs flake8 and mypy code checks against staged changes.
@cd core && hatch run lint
.PHONY: code_quality
code_quality: ## Runs all pre-commit hooks against all files.
@cd core && hatch run code-quality
.PHONY: unit
unit: .env ## Runs unit tests with py
@\
$(DOCKER_CMD) tox -e py
unit: ## Runs unit tests with py
@cd core && hatch run unit-tests
.PHONY: test
test: .env ## Runs unit tests with py and code checks against staged changes.
@\
$(DOCKER_CMD) tox -e py; \
$(DOCKER_CMD) pre-commit run black-check --hook-stage manual | grep -v "INFO"; \
$(DOCKER_CMD) pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \
$(DOCKER_CMD) pre-commit run mypy-check --hook-stage manual | grep -v "INFO"
test: ## Runs unit tests with py and code checks against staged changes.
@cd core && hatch run test
.PHONY: integration
integration: .env ## Runs core integration tests using postgres with py-integration
@\
$(CI_FLAGS) $(DOCKER_CMD) tox -e py-integration -- -nauto
integration: ## Runs core integration tests using postgres with py-integration
@cd core && hatch run integration-tests
.PHONY: integration-fail-fast
integration-fail-fast: .env ## Runs core integration tests using postgres with py-integration in "fail fast" mode.
@\
$(DOCKER_CMD) tox -e py-integration -- -x -nauto
.PHONY: interop
interop: clean
@\
mkdir $(LOG_DIR) && \
$(CI_FLAGS) $(DOCKER_CMD) tox -e py-integration -- -nauto && \
LOG_DIR=$(LOG_DIR) cargo run --manifest-path test/interop/log_parsing/Cargo.toml
integration-fail-fast: ## Runs core integration tests using postgres with py-integration in "fail fast" mode.
@cd core && hatch run integration-tests-fail-fast
.PHONY: setup-db
setup-db: ## Setup Postgres database with docker-compose for system testing.
@\
docker compose up -d database && \
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres SKIP_HOMEBREW=true bash test/setup_db.sh
# This rule creates a file named .env that is used by docker-compose for passing
# the USER_ID and GROUP_ID arguments to the Docker image.
.env: ## Setup step for using using docker-compose with make target.
@touch .env
ifneq ($(OS),Windows_NT)
ifneq ($(shell uname -s), Darwin)
@echo USER_ID=$(shell id -u) > .env
@echo GROUP_ID=$(shell id -g) >> .env
endif
endif
@cd core && hatch run setup-db
.PHONY: clean
clean: ## Resets development environment.
@echo 'cleaning repo...'
@rm -f .coverage
@rm -f .coverage.*
@rm -rf .eggs/
@rm -f .env
@rm -rf .tox/
@rm -rf build/
@rm -rf dbt.egg-info/
@rm -f dbt_project.yml
@rm -rf dist/
@rm -f htmlcov/*.{css,html,js,json,png}
@rm -rf logs/
@rm -rf target/
@find . -type f -name '*.pyc' -delete
@find . -type d -name '__pycache__' -depth -delete
@echo 'done.'
@cd core && hatch run clean
.PHONY: json_schema
json_schema: ## Update generated JSON schema using code changes.
@cd core && hatch run json-schema
.PHONY: help
help: ## Show this help message.
@echo 'usage: make [target] [USE_DOCKER=true]'
@echo 'usage: make [target]'
@echo
@echo 'DEPRECATED: This Makefile is a compatibility shim.'
@echo 'Please use "cd core && hatch run <command>" directly.'
@echo
@echo 'targets:'
@grep -E '^[8+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
@echo
@echo 'options:'
@echo 'use USE_DOCKER=true to run target in a docker container'
.PHONY: json_schema
json_schema: ## Update generated JSON schema using code changes.
scripts/collect-artifact-schema.py --path schemas
@echo 'For more information, see CONTRIBUTING.md'

View File

@@ -1,4 +0,0 @@
recursive-include dbt/include *.py *.sql *.yml *.html *.md .gitkeep .gitignore
include dbt/py.typed
recursive-include dbt/task/docs *.html
recursive-include dbt/jsonschemas *.json

View File

@@ -1,5 +1,5 @@
<p align="center">
<img src="https://raw.githubusercontent.com/dbt-labs/dbt-core/fa1ea14ddfb1d5ae319d5141844910dd53ab2834/etc/dbt-core.svg" alt="dbt logo" width="750"/>
<img src="https://raw.githubusercontent.com/dbt-labs/dbt-core/fa1ea14ddfb1d5ae319d5141844910dd53ab2834/docs/images/dbt-core.svg" alt="dbt logo" width="750"/>
</p>
<p align="center">
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml">
@@ -9,7 +9,7 @@
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
![architecture](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-arch.png)
![architecture](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/docs/images/dbt-arch.png)
## Understanding dbt
@@ -17,7 +17,7 @@ Analysts using dbt can transform their data by simply writing select statements,
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
![dbt dag](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-dag.png)
![dbt dag](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/docs/images/dbt-dag.png)
## Getting started

1
core/dbt/__version__.py Normal file
View File

@@ -0,0 +1 @@
version = "1.12.0a1"

View File

@@ -2,11 +2,13 @@ import inspect
import typing as t
import click
from click import Context
from click.parser import _OptionParser, _ParsingState
from dbt.cli.option_types import ChoiceTuple
if t.TYPE_CHECKING:
from click import Context
from click.parser import _OptionParser, _ParsingState
# Implementation from: https://stackoverflow.com/a/48394004
# Note MultiOption options must be specified with type=tuple or type=ChoiceTuple (https://github.com/pallets/click/issues/2012)
@@ -33,8 +35,8 @@ class MultiOption(click.Option):
else:
assert isinstance(option_type, ChoiceTuple), msg
def add_to_parser(self, parser: _OptionParser, ctx: Context):
def parser_process(value: str, state: _ParsingState):
def add_to_parser(self, parser: "_OptionParser", ctx: "Context"):
def parser_process(value: str, state: "_ParsingState"):
# method to hook to the parser.process
done = False
value_list = str.split(value, " ")
@@ -65,7 +67,7 @@ class MultiOption(click.Option):
break
return retval
def type_cast_value(self, ctx: Context, value: t.Any) -> t.Any:
def type_cast_value(self, ctx: "Context", value: t.Any) -> t.Any:
def flatten(data):
if isinstance(data, tuple):
for x in data:

View File

@@ -446,3 +446,5 @@ def setup_manifest(ctx: Context, write: bool = True, write_perf_info: bool = Fal
adapter.set_macro_resolver(ctx.obj["manifest"])
query_header_context = generate_query_header_context(adapter.config, ctx.obj["manifest"]) # type: ignore[attr-defined]
adapter.connections.set_query_header(query_header_context)
for integration in active_integrations:
adapter.add_catalog_integration(integration)

View File

@@ -544,9 +544,15 @@ class ParseConfigObject(Config):
def require(self, name, validator=None):
return ""
def meta_require(self, name, validator=None):
return ""
def get(self, name, default=None, validator=None):
return ""
def meta_get(self, name, default=None, validator=None):
return ""
def persist_relation_docs(self) -> bool:
return False
@@ -578,6 +584,16 @@ class RuntimeConfigObject(Config):
raise MissingConfigError(unique_id=self.model.unique_id, name=name)
return result
def _lookup_meta(self, name, default=_MISSING):
# if this is a macro, there might be no `model.config`.
if not hasattr(self.model, "config"):
result = default
else:
result = self.model.config.meta_get(name, default)
if result is _MISSING:
raise MissingConfigError(unique_id=self.model.unique_id, name=name)
return result
def require(self, name, validator=None):
to_return = self._lookup(name)
@@ -586,6 +602,12 @@ class RuntimeConfigObject(Config):
return to_return
def meta_require(self, name, validator=None):
to_return = self._lookup_meta(name)
if validator is not None:
self._validate(validator, to_return)
def get(self, name, default=None, validator=None):
to_return = self._lookup(name, default)
@@ -594,6 +616,14 @@ class RuntimeConfigObject(Config):
return to_return
def meta_get(self, name, default=None, validator=None):
to_return = self._lookup_meta(name, default)
if validator is not None and default is not None:
self._validate(validator, to_return)
return to_return
def persist_relation_docs(self) -> bool:
persist_docs = self.get("persist_docs", default={})
if not isinstance(persist_docs, dict):

View File

@@ -16,15 +16,14 @@ from dbt_common.events.format import (
pluralize,
timestamp_to_datetime_string,
)
from dbt_common.ui import (
deprecation_tag,
error_tag,
green,
line_wrap_message,
red,
warning_tag,
yellow,
)
from dbt_common.ui import deprecation_tag as deprecation_tag_less_strict
from dbt_common.ui import error_tag, green, line_wrap_message, red, warning_tag, yellow
# This makes it so that mypy will complain if a deprecation tag is used without an event name
def _deprecation_tag(description: str, event_name: str) -> str:
return deprecation_tag_less_strict(description, event_name)
# Event codes have prefixes which follow this table
#
@@ -260,7 +259,7 @@ class DeprecatedModel(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(msg, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(msg, self.__class__.__name__))
else:
return warning_tag(msg)
@@ -276,9 +275,9 @@ class PackageRedirectDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class PackageInstallPathDeprecation(WarnLevel):
@@ -293,9 +292,9 @@ class PackageInstallPathDeprecation(WarnLevel):
"""
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class ConfigSourcePathDeprecation(WarnLevel):
@@ -309,9 +308,9 @@ class ConfigSourcePathDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class ConfigDataPathDeprecation(WarnLevel):
@@ -325,9 +324,9 @@ class ConfigDataPathDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class MetricAttributesRenamed(WarnLevel):
@@ -345,9 +344,9 @@ class MetricAttributesRenamed(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return deprecation_tag(description)
return deprecation_tag_less_strict(description)
class ExposureNameDeprecation(WarnLevel):
@@ -364,9 +363,9 @@ class ExposureNameDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class InternalDeprecation(WarnLevel):
@@ -383,7 +382,7 @@ class InternalDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return deprecation_tag(msg, self.__class__.__name__)
return _deprecation_tag(msg, self.__class__.__name__)
else:
return warning_tag(msg)
@@ -401,9 +400,9 @@ class EnvironmentVariableRenamed(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class ConfigLogPathDeprecation(WarnLevel):
@@ -422,9 +421,9 @@ class ConfigLogPathDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class ConfigTargetPathDeprecation(WarnLevel):
@@ -443,9 +442,9 @@ class ConfigTargetPathDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
# Note: this deprecation has been removed, but we are leaving
@@ -462,9 +461,9 @@ class TestsConfigDeprecation(WarnLevel):
)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(deprecation_tag_less_strict(description))
class ProjectFlagsMovedDeprecation(WarnLevel):
@@ -478,9 +477,9 @@ class ProjectFlagsMovedDeprecation(WarnLevel):
)
# Can't use line_wrap_message here because flags.printer_width isn't available yet
if require_event_names_in_deprecations():
return deprecation_tag(description, self.__class__.__name__)
return _deprecation_tag(description, self.__class__.__name__)
else:
return deprecation_tag(description)
return deprecation_tag_less_strict(description)
class SpacesInResourceNameDeprecation(DynamicLevel):
@@ -496,7 +495,7 @@ class SpacesInResourceNameDeprecation(DynamicLevel):
description = warning_tag(description)
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(description)
@@ -514,7 +513,7 @@ class ResourceNamesWithSpacesDeprecation(WarnLevel):
description += " For more information: https://docs.getdbt.com/reference/global-configs/legacy-behaviors"
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -527,7 +526,7 @@ class PackageMaterializationOverrideDeprecation(WarnLevel):
description = f"Installed package '{self.package_name}' is overriding the built-in materialization '{self.materialization_name}'. Overrides of built-in materializations from installed packages will be deprecated in future versions of dbt. For more information: https://docs.getdbt.com/reference/global-configs/legacy-behaviors"
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -540,7 +539,7 @@ class SourceFreshnessProjectHooksNotRun(WarnLevel):
description = "In a future version of dbt, the `source freshness` command will start running `on-run-start` and `on-run-end` hooks by default. For more information: https://docs.getdbt.com/reference/global-configs/legacy-behaviors"
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -553,7 +552,7 @@ class MFTimespineWithoutYamlConfigurationDeprecation(WarnLevel):
description = "Time spines without YAML configuration are in the process of deprecation. Please add YAML configuration for your 'metricflow_time_spine' model. See documentation on MetricFlow time spines: https://docs.getdbt.com/docs/build/metricflow-time-spine and behavior change documentation: https://docs.getdbt.com/reference/global-configs/behavior-changes."
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -566,7 +565,7 @@ class MFCumulativeTypeParamsDeprecation(WarnLevel):
description = "Cumulative fields `type_params.window` and `type_params.grain_to_date` have been moved and will soon be deprecated. Please nest those values under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes."
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -579,7 +578,7 @@ class MicrobatchMacroOutsideOfBatchesDeprecation(WarnLevel):
description = "The use of a custom microbatch macro outside of batched execution is deprecated. To use it with batched execution, set `flags.require_batched_execution_for_custom_microbatch_strategy` to `True` in `dbt_project.yml`. In the future this will be the default behavior."
if require_event_names_in_deprecations():
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
else:
return line_wrap_message(warning_tag(description))
@@ -599,7 +598,7 @@ class GenericJSONSchemaValidationDeprecation(WarnLevel):
else:
description = f"{self.violation} in file `{self.file}` at path `{self.key_path}` is possibly a deprecation. {possible_causes}"
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class UnexpectedJinjaBlockDeprecation(WarnLevel):
@@ -608,7 +607,7 @@ class UnexpectedJinjaBlockDeprecation(WarnLevel):
def message(self) -> str:
description = f"{self.msg} in file `{self.file}`"
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class DuplicateYAMLKeysDeprecation(WarnLevel):
@@ -617,7 +616,7 @@ class DuplicateYAMLKeysDeprecation(WarnLevel):
def message(self) -> str:
description = f"{self.duplicate_description} in file `{self.file}`"
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class CustomTopLevelKeyDeprecation(WarnLevel):
@@ -626,7 +625,7 @@ class CustomTopLevelKeyDeprecation(WarnLevel):
def message(self) -> str:
description = f"{self.msg} in file `{self.file}`"
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class CustomKeyInConfigDeprecation(WarnLevel):
@@ -639,7 +638,7 @@ class CustomKeyInConfigDeprecation(WarnLevel):
path_specification = f" at path `{self.key_path}`"
description = f"Custom key `{self.key}` found in `config`{path_specification} in file `{self.file}`. Custom config keys should move into the `config.meta`."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class CustomKeyInObjectDeprecation(WarnLevel):
@@ -648,7 +647,7 @@ class CustomKeyInObjectDeprecation(WarnLevel):
def message(self) -> str:
description = f"Custom key `{self.key}` found at `{self.key_path}` in file `{self.file}`. This may mean the key is a typo, or is simply not a key supported by the object."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class DeprecationsSummary(WarnLevel):
@@ -665,7 +664,7 @@ class DeprecationsSummary(WarnLevel):
if self.show_all_hint:
description += "\n\nTo see all deprecation instances instead of just the first occurrence of each, run command again with the `--show-all-deprecations` flag. You may also need to run with `--no-partial-parse` as some deprecations are only encountered during parsing."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class CustomOutputPathInSourceFreshnessDeprecation(WarnLevel):
@@ -674,7 +673,7 @@ class CustomOutputPathInSourceFreshnessDeprecation(WarnLevel):
def message(self) -> str:
description = f"Custom output path usage `--output {self.path}` usage detected in `dbt source freshness` command."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class PropertyMovedToConfigDeprecation(WarnLevel):
@@ -683,7 +682,7 @@ class PropertyMovedToConfigDeprecation(WarnLevel):
def message(self) -> str:
description = f"Found `{self.key}` as a top-level property of `{self.key_path}` in file `{self.file}`. The `{self.key}` top-level property should be moved into the `config` of `{self.key_path}`."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class WEOIncludeExcludeDeprecation(WarnLevel):
@@ -703,7 +702,7 @@ class WEOIncludeExcludeDeprecation(WarnLevel):
if self.found_exclude:
description += " Please use `warn` instead of `exclude`."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class ModelParamUsageDeprecation(WarnLevel):
@@ -712,7 +711,7 @@ class ModelParamUsageDeprecation(WarnLevel):
def message(self) -> str:
description = "Usage of `--models`, `--model`, and `-m` is deprecated in favor of `--select` or `-s`."
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class ModulesItertoolsUsageDeprecation(WarnLevel):
@@ -723,7 +722,7 @@ class ModulesItertoolsUsageDeprecation(WarnLevel):
description = (
"Usage of itertools modules is deprecated. Please use the built-in functions instead."
)
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class SourceOverrideDeprecation(WarnLevel):
@@ -732,7 +731,7 @@ class SourceOverrideDeprecation(WarnLevel):
def message(self) -> str:
description = f"The source property `overrides` is deprecated but was found on source `{self.source_name}` in file `{self.file}`. Instead, `enabled` should be used to disable the unwanted source."
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class EnvironmentVariableNamespaceDeprecation(WarnLevel):
@@ -741,7 +740,7 @@ class EnvironmentVariableNamespaceDeprecation(WarnLevel):
def message(self) -> str:
description = f"Found custom environment variable `{self.env_var}` in the environment. The prefix `{self.reserved_prefix}` is reserved for dbt engine environment variables. Custom environment variables with the prefix `{self.reserved_prefix}` may cause collisions and runtime errors."
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class MissingPlusPrefixDeprecation(WarnLevel):
@@ -750,7 +749,7 @@ class MissingPlusPrefixDeprecation(WarnLevel):
def message(self) -> str:
description = f"Missing '+' prefix on `{self.key}` found at `{self.key_path}` in file `{self.file}`. Hierarchical config values without a '+' prefix are deprecated in dbt_project.yml."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class ArgumentsPropertyInGenericTestDeprecation(WarnLevel):
@@ -759,7 +758,7 @@ class ArgumentsPropertyInGenericTestDeprecation(WarnLevel):
def message(self) -> str:
description = f"Found `arguments` property in test definition of {self.test_name} without usage of `require_generic_test_arguments_property` behavior change flag. The `arguments` property is deprecated for custom usage and will be used to nest keyword arguments in future versions of dbt."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class MissingArgumentsPropertyInGenericTestDeprecation(WarnLevel):
@@ -768,7 +767,7 @@ class MissingArgumentsPropertyInGenericTestDeprecation(WarnLevel):
def message(self) -> str:
description = f"Found top-level arguments to test {self.test_name}. Arguments to generic tests should be nested under the `arguments` property."
return line_wrap_message(deprecation_tag(description, self.__class__.__name__))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
class DuplicateNameDistinctNodeTypesDeprecation(WarnLevel):
@@ -777,7 +776,7 @@ class DuplicateNameDistinctNodeTypesDeprecation(WarnLevel):
def message(self) -> str:
description = f"Found resources with the same name '{self.resource_name}' in package '{self.package_name}': '{self.unique_id1}' and '{self.unique_id2}'. Please update one of the resources to have a unique name."
return line_wrap_message(deprecation_tag(description))
return line_wrap_message(_deprecation_tag(description, self.__class__.__name__))
# =======================================================

View File

@@ -1,5 +1,4 @@
import json
import os
import re
from datetime import date, datetime
from pathlib import Path
@@ -141,9 +140,6 @@ def _get_allowed_config_fields_from_error_path(
def _can_run_validations() -> bool:
if not os.environ.get("DBT_ENV_PRIVATE_RUN_JSONSCHEMA_VALIDATIONS"):
return False
invocation_context = get_invocation_context()
return invocation_context.adapter_types.issubset(_JSONSCHEMA_SUPPORTED_ADAPTERS)
@@ -269,6 +265,11 @@ def validate_model_config(config: Dict[str, Any], file_path: str) -> None:
if len(error.path) == 0:
key_path = error_path_to_string(error)
for key in keys:
# Special case for pre/post hook keys as they are updated during config parsing
# from the user-provided pre_hook/post_hook to pre-hook/post-hook keys.
# Avoids false positives as described in https://github.com/dbt-labs/dbt-core/issues/12087
if key in ("post-hook", "pre-hook"):
continue
deprecations.warn(
"custom-key-in-config-deprecation",
key=key,

View File

@@ -635,23 +635,24 @@ class ManifestLoader:
def check_for_spaces_in_resource_names(self):
"""Validates that resource names do not contain spaces
If `DEBUG` flag is `False`, logs only first bad model name
If `DEBUG` flag is `False`, logs only first bad model name, unless `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `True` as error will indicate all bad model names
If `DEBUG` flag is `True`, logs every bad model name
If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `True`, logs are `ERROR` level and an exception is raised if any names are bad
If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `False`, logs are `WARN` level
"""
improper_resource_names = 0
level = (
EventLevel.ERROR
if self.root_project.args.REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES
else EventLevel.WARN
improper_resource_names_unique_ids = set()
error_on_invalid_resource_name = (
self.root_project.args.REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES
)
level = EventLevel.ERROR if error_on_invalid_resource_name else EventLevel.WARN
flags = get_flags()
for node in self.manifest.nodes.values():
if " " in node.name:
if improper_resource_names == 0 or flags.DEBUG:
if (
not improper_resource_names_unique_ids and not error_on_invalid_resource_name
) or flags.DEBUG:
fire_event(
SpacesInResourceNameDeprecation(
unique_id=node.unique_id,
@@ -659,17 +660,23 @@ class ManifestLoader:
),
level=level,
)
improper_resource_names += 1
improper_resource_names_unique_ids.add(node.unique_id)
if improper_resource_names > 0:
if improper_resource_names_unique_ids:
if level == EventLevel.WARN:
dbt.deprecations.warn(
"resource-names-with-spaces",
count_invalid_names=improper_resource_names,
count_invalid_names=len(improper_resource_names_unique_ids),
show_debug_hint=(not flags.DEBUG),
)
else: # ERROR level
raise DbtValidationError("Resource names cannot contain spaces")
formatted_resources_with_spaces = "\n".join(
f" * '{unique_id}' ({self.manifest.nodes[unique_id].original_file_path})"
for unique_id in improper_resource_names_unique_ids
)
raise DbtValidationError(
f"Resource names cannot contain spaces:\n{formatted_resources_with_spaces}\nPlease rename the invalid model(s) so that their name(s) do not contain any spaces."
)
def check_for_microbatch_deprecations(self) -> None:
if not get_flags().require_batched_execution_for_custom_microbatch_strategy:

View File

@@ -11,7 +11,13 @@ from dbt.contracts.files import (
parse_file_type_to_parser,
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import AnalysisNode, ModelNode, SeedNode, SnapshotNode
from dbt.contracts.graph.nodes import (
AnalysisNode,
GenericTestNode,
ModelNode,
SeedNode,
SnapshotNode,
)
from dbt.events.types import PartialParsingEnabled, PartialParsingFile
from dbt.node_types import NodeType
from dbt_common.context import get_invocation_context
@@ -970,7 +976,7 @@ class PartialParsing:
for child_id in self.saved_manifest.child_map[unique_id]:
if child_id.startswith("test") and child_id in self.saved_manifest.nodes:
child_test = self.saved_manifest.nodes[child_id]
if child_test.attached_node:
if isinstance(child_test, GenericTestNode) and child_test.attached_node:
if child_test.attached_node in self.saved_manifest.nodes:
attached_node = self.saved_manifest.nodes[child_test.attached_node]
self.update_in_saved(attached_node.file_id)

View File

@@ -290,8 +290,11 @@ class UnitTestParser(YamlReader):
)
if tested_model_node:
unit_test_definition.depends_on.nodes.append(tested_model_node.unique_id)
unit_test_definition.schema = tested_model_node.schema
if tested_model_node.config.enabled:
unit_test_definition.depends_on.nodes.append(tested_model_node.unique_id)
unit_test_definition.schema = tested_model_node.schema
else:
unit_test_definition.config.enabled = False
# Check that format and type of rows matches for each given input,
# convert rows to a list of dictionaries, and add the unique_id of
@@ -302,7 +305,7 @@ class UnitTestParser(YamlReader):
# for calculating state:modified
unit_test_definition.build_unit_test_checksum()
assert isinstance(self.yaml.file, SchemaSourceFile)
if unit_test_config.enabled:
if unit_test_definition.config.enabled:
self.manifest.add_unit_test(self.yaml.file, unit_test_definition)
else:
self.manifest.add_disabled(self.yaml.file, unit_test_definition)
@@ -492,6 +495,13 @@ def find_tested_model_node(
model_version = model_name_split[1] if len(model_name_split) == 2 else None
tested_node = manifest.ref_lookup.find(model_name, current_project, model_version, manifest)
if not tested_node:
disabled_node = manifest.disabled_lookup.find(
model_name, current_project, model_version, [NodeType.Model]
)
if disabled_node:
tested_node = disabled_node[0]
return tested_node
@@ -509,22 +519,36 @@ def process_models_for_unit_test(
f"Unable to find model '{current_project}.{unit_test_def.model}' for "
f"unit test '{unit_test_def.name}' in {unit_test_def.original_file_path}"
)
unit_test_def.depends_on.nodes.append(tested_node.unique_id)
unit_test_def.schema = tested_node.schema
if tested_node.config.enabled:
unit_test_def.depends_on.nodes.append(tested_node.unique_id)
unit_test_def.schema = tested_node.schema
else:
# If the model is disabled, the unit test should be disabled
unit_test_def.config.enabled = False
# The UnitTestDefinition should only have one "depends_on" at this point,
# the one that's found by the "model" field.
target_model_id = unit_test_def.depends_on.nodes[0]
if target_model_id not in manifest.nodes:
if target_model_id in manifest.disabled:
# The model is disabled, so we don't need to do anything (#10540)
return
# If the model is disabled, the unit test should be disabled
unit_test_def.config.enabled = False
else:
# If we've reached here and the model is not disabled, throw an error
raise ParsingError(
f"Unit test '{unit_test_def.name}' references a model that does not exist: {target_model_id}"
)
if not unit_test_def.config.enabled:
# Ensure the unit test is disabled in the manifest
if unit_test_def.unique_id in manifest.unit_tests:
manifest.unit_tests.pop(unit_test_def.unique_id)
if unit_test_def.unique_id not in manifest.disabled:
manifest.add_disabled(manifest.files[unit_test_def.file_id], unit_test_def)
# The unit test is disabled, so we don't need to do any further processing (#10540)
return
target_model = manifest.nodes[target_model_id]
assert isinstance(target_model, ModelNode)

View File

@@ -3,14 +3,13 @@ import importlib
import importlib.util
import json
import os
import re
from importlib import metadata as importlib_metadata
from pathlib import Path
from typing import Iterator, List, Optional, Tuple
import requests
import dbt_common.semver as semver
from dbt.__version__ import version as __version_string
from dbt_common.ui import green, yellow
PYPI_VERSION_URL = "https://pypi.org/pypi/dbt-core/json"
@@ -233,16 +232,8 @@ def _resolve_version() -> str:
try:
return importlib_metadata.version("dbt-core")
except importlib_metadata.PackageNotFoundError:
pyproject_path = Path(__file__).resolve().parents[1] / "pyproject.toml"
if not pyproject_path.exists():
raise RuntimeError("Unable to locate pyproject.toml to determine dbt-core version")
text = pyproject_path.read_text(encoding="utf-8")
match = re.search(r'^version\s*=\s*"(?P<version>[^"]+)"', text, re.MULTILINE)
if match:
return match.group("version")
raise RuntimeError("Unable to determine dbt-core version from pyproject.toml")
# When running from source (not installed), use version from __version__.py
return __version_string
__version__ = _resolve_version()

206
core/hatch.toml Normal file
View File

@@ -0,0 +1,206 @@
[version]
path = "dbt/__version__.py"
[build.targets.wheel]
packages = ["dbt"]
only-packages = true
exclude = [
"**/*.md",
]
artifacts = [
"dbt/include/**/*.py",
"dbt/include/**/*.sql",
"dbt/include/**/*.yml",
"dbt/include/**/*.html",
"dbt/include/**/*.md",
"dbt/include/**/.gitkeep",
"dbt/include/**/.gitignore",
"dbt/task/docs/**/*.html",
"dbt/jsonschemas/**/*.json",
"dbt/py.typed",
# Directories without __init__.py (namespace packages)
"dbt/artifacts/resources/v1/**/*.py",
"dbt/artifacts/utils/**/*.py",
"dbt/event_time/**/*.py",
"dbt/docs/source/**/*.py",
"dbt/tests/util.py",
]
[build.targets.sdist]
include = [
"/dbt",
"/README.md",
]
[build.targets.sdist.force-include]
"dbt/task/docs/index.html" = "dbt/task/docs/index.html"
[envs.default]
# Python 3.10-3.11 required locally due to flake8==4.0.1 compatibility
# CI uses [envs.ci] which doesn't set python, allowing matrix testing
python = "3.11"
dependencies = [
# Git dependencies for development against main branches
"dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-adapters",
"dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter",
"dbt-common @ git+https://github.com/dbt-labs/dbt-common.git@main",
"dbt-postgres @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-postgres",
# Code quality
"pre-commit~=3.7.0",
"black>=24.3,<25.0",
"flake8==4.0.1", # requires python <3.12
"mypy==1.4.1", # update requires code fixes
"isort==5.13.2",
# Testing
"pytest>=7.0,<8.0",
"pytest-xdist~=3.6",
"pytest-csv~=3.0",
"pytest-cov",
"pytest-dotenv",
"pytest-mock",
"pytest-split",
"pytest-logbook~=1.2",
"logbook<1.9",
"flaky",
"freezegun>=1.5.1",
"hypothesis",
"mocker",
# Debugging
"ipdb",
"ddtrace==2.21.3",
# Documentation
"docutils",
"sphinx",
# Type stubs
"types-docutils",
"types-PyYAML",
"types-Jinja2",
"types-jsonschema",
"types-mock",
"types-protobuf>=5.0,<6.0",
"types-python-dateutil",
"types-pytz",
"types-requests",
"types-setuptools",
# Other
"pip-tools",
"protobuf>=6.0,<7.0",
]
[envs.default.scripts]
# Setup commands
setup = [
"pip install -e .",
"pre-commit install",
]
# Code quality commands
code-quality = "pre-commit run --all-files --show-diff-on-failure"
lint = [
"pre-commit run flake8-check --hook-stage manual --all-files",
"pre-commit run mypy-check --hook-stage manual --all-files",
]
flake8 = "pre-commit run flake8-check --hook-stage manual --all-files"
mypy = "pre-commit run mypy-check --hook-stage manual --all-files"
black = "pre-commit run black-check --hook-stage manual --all-files"
# Testing commands
unit-tests = "python -m pytest {args} ../tests/unit"
integration-tests = "python -m pytest -nauto {args} ../tests/functional"
integration-tests-fail-fast = "python -m pytest -x -nauto {args} ../tests/functional"
test = [
"python -m pytest ../tests/unit",
"pre-commit run black-check --hook-stage manual --all-files",
"pre-commit run flake8-check --hook-stage manual --all-files",
"pre-commit run mypy-check --hook-stage manual --all-files",
]
# Database setup
setup-db = [
"docker compose up -d database",
"bash ../scripts/setup_db.sh",
]
# Utility commands
clean = [
"rm -f .coverage",
"rm -f .coverage.*",
"rm -rf .eggs/",
"rm -rf build/",
"rm -rf dbt.egg-info/",
"rm -f dbt_project.yml",
"rm -rf dist/",
"find . -type f -name '*.pyc' -delete",
"find . -type d -name __pycache__ -exec rm -rf {} +",
]
json-schema = "python ../scripts/collect-artifact-schema.py --path ../schemas"
[envs.build]
python = "3.11"
detached = true
dependencies = [
"wheel",
"twine",
"check-wheel-contents",
]
[envs.build.scripts]
check-all = [
"- check-wheel",
"- check-sdist",
]
check-wheel = [
"twine check dist/*",
"find ./dist/dbt_core-*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/",
"pip freeze | grep dbt-core",
"dbt --version",
]
check-sdist = [
"check-wheel-contents dist/*.whl --ignore W007,W008",
"find ./dist/dbt_core-*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/",
"pip freeze | grep dbt-core",
"dbt --version",
]
# CI environment - isolated environment with test dependencies
[envs.ci]
dependencies = [
# Git dependencies for development against main branches
"dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-adapters",
"dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter",
"dbt-common @ git+https://github.com/dbt-labs/dbt-common.git@main",
"dbt-postgres @ git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-postgres",
# Testing
"pytest>=7.0,<8.0",
"pytest-cov",
"pytest-xdist~=3.6",
"pytest-csv~=3.0",
"pytest-dotenv",
"pytest-mock",
"pytest-split",
"ddtrace==2.21.3",
"flaky",
"freezegun>=1.5.1",
"hypothesis",
]
pre-install-commands = [
"pip install -e .",
]
[envs.ci.env-vars]
DBT_TEST_USER_1 = "dbt_test_user_1"
DBT_TEST_USER_2 = "dbt_test_user_2"
DBT_TEST_USER_3 = "dbt_test_user_3"
[envs.ci.scripts]
unit-tests = "python -m pytest --cov=dbt --cov-report=xml {args} ../tests/unit"
# Run as single command to avoid pre-install-commands running twice
integration-tests = """
python -m pytest --cov=dbt --cov-append --cov-report=xml {args} ../tests/functional -k "not tests/functional/graph_selection" && \
python -m pytest --cov=dbt --cov-append --cov-report=xml {args} ../tests/functional/graph_selection
"""
# Note: Python version matrix is handled by GitHub Actions CI, not hatch.
# This avoids running tests 4x per job. The CI sets up the Python version
# and hatch uses whatever Python is active.

View File

@@ -1,38 +1,12 @@
[tool.setuptools]
package-dir = {"" = "."}
include-package-data = true
zip-safe = false
[tool.setuptools.packages.find]
where = ["."]
include = [
"dbt",
"dbt.*",
]
# this needs to match MANIFEST.in for the wheels
[tool.setuptools.package-data]
"dbt" = [
"include/**/*.py",
"include/**/*.sql",
"include/**/*.yml",
"include/**/*.html",
"include/**/*.md",
"include/**/.gitkeep",
"include/**/.gitignore",
"task/docs/**/*.html",
"jsonschemas/**/*.json",
"py.typed",
]
[project]
name = "dbt-core"
version = "1.12.0a1"
dynamic = ["version"]
description = "With dbt, data analysts and engineers can build analytics the way engineers build applications."
readme = "README.md"
requires-python = ">=3.10"
license = "Apache-2.0"
license-files = ["License.md"] # License.md copied to core/ by build script even though it lives at the root by convention
license-files = { globs = ["LICENSE"] }
keywords = []
authors = [
{ name = "dbt Labs", email = "info@dbtlabs.com" },
@@ -80,9 +54,9 @@ dependencies = [
"dbt-extractor>=0.5.0,<=0.6",
"dbt-semantic-interfaces>=0.9.0,<0.10",
# Minor versions for these are expected to be backwards-compatible
"dbt-common>=1.27.0,<2.0",
"dbt-common>=1.37.0,<2.0",
"dbt-adapters>=1.15.5,<2.0",
"dbt-protos>=1.0.397,<2.0",
"dbt-protos>=1.0.405,<2.0",
"pydantic<3",
# ----
# Expect compatibility with all new versions of these packages, so lower bounds only.
@@ -102,9 +76,10 @@ Changelog = "https://github.com/dbt-labs/dbt-core/blob/main/CHANGELOG.md"
[project.scripts]
dbt = "dbt.cli.main:cli"
[tool.hatch.version]
path = "dbt/__version__.py"
[build-system]
requires = [
"setuptools>=61",
"wheel",
]
build-backend = "setuptools.build_meta"
requires = ["hatchling"]
build-backend = "hatchling.build"

View File

@@ -1,5 +0,0 @@
# The create_adapter_plugins script is being replaced by a new interactive cookiecutter scaffold
# that can be found https://github.com/dbt-labs/dbt-database-adapter-scaffold
print(
"This script has been deprecated, to create a new adapter please visit https://github.com/dbt-labs/dbt-database-adapter-scaffold"
)

View File

@@ -1,26 +0,0 @@
#!/usr/bin/env python
"""Legacy setuptools shim retained for compatibility with existing workflows. Will be removed in a future version."""
from setuptools import setup
# the user has a downlevel version of setuptools.
# ----
# dbt-core uses these packages deeply, throughout the codebase, and there have been breaking changes in past patch releases (even though these are major-version-one).
# Pin to the patch or minor version, and bump in each new minor version of dbt-core.
# ----
# dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility
# with major versions in each new minor version of dbt-core.
# ----
# These packages are major-version-0. Keep upper bounds on upcoming minor versions (which could have breaking changes)
# and check compatibility / bump in each new minor version of dbt-core.
# ----
# These are major-version-0 packages also maintained by dbt-labs.
# Accept patches but avoid automatically updating past a set minor version range.
# Minor versions for these are expected to be backwards-compatible
# ----
# Expect compatibility with all new versions of these packages, so lower bounds only.
# ----
if __name__ == "__main__":
setup()

View File

@@ -1,38 +0,0 @@
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-adapters
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter
git+https://github.com/dbt-labs/dbt-common.git@main
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-postgres
black==24.3.0
bumpversion
ddtrace==2.21.3
docutils
flake8==4.0.1
flaky
freezegun>=1.5.1
hypothesis
ipdb
isort==5.13.2
mypy==1.4.1
pip-tools
pre-commit
protobuf>=6.0,<7.0
pytest>=7.4,<8.0
pytest-cov
pytest-csv>=3.0,<4.0
pytest-dotenv
pytest-mock
pytest-split
pytest-xdist
sphinx
tox>=3.13
types-docutils
types-PyYAML
types-Jinja2
types-jsonschema
types-mock
types-protobuf>=5.0,<6.0
types-python-dateutil
types-pytz
types-requests
types-setuptools
mocker

View File

@@ -23,7 +23,7 @@ services:
# Run `make .env` to set $USER_ID and $GROUP_ID
USER_ID: ${USER_ID:-}
GROUP_ID: ${GROUP_ID:-}
command: "/root/.virtualenvs/dbt/bin/pytest"
command: "bash -c 'cd core && hatch run ci:unit-tests'"
environment:
POSTGRES_TEST_HOST: "database"
volumes:

View File

@@ -1,11 +0,0 @@
## ADRs
For any architectural/engineering decisions we make, we will create an ADR (Architectural Design Record) to keep track of what decision we made and why. This allows us to refer back to decisions in the future and see if the reasons we made a choice still holds true. This also allows for others to more easily understand the code. ADRs will follow this process:
- They will live in the repo, under a directory `docs/arch`
- They will be written in markdown
- They will follow the naming convention [`adr-NNN-<decision-title>.md`](http://adr-nnn.md/)
- `NNN` will just be a counter starting at `001` and will allow us easily keep the records in chronological order.
- The common sections that each ADR should have are:
- Title, Context, Decision, Status, Consequences
- Use this article as a reference: [https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions](https://cognitect.com/blog/2011/11/15/documenting-architecture-decisions)

View File

@@ -1,35 +0,0 @@
# Performance Regression Framework
## Context
We want the ability to benchmark our perfomance overtime with new changes going forward.
### Options
- Static Window: Compare the develop branch to fastest version and ensure it doesn't exceed a static window (i.e. time parse on develop and time parse on 0.20.latest and make sure it's not more than 5% slower)
- Pro: quick to run
- Pro: simple to implement
- Con: rerunning a failing test could get it to pass in a large number of changes.
- Con: several small regressions could press us up against the threshold requiring us to do unexpected additional performance work, or lower the threshold to get a release out.
- Variance-aware Testing: Run both the develop branch and our fastest version *many times* to collect a set of timing data. We can fail on a static window based on medians, confidence interval midpoints, and even variance magnitude.
- Pro: would catch more small performance regressions
- Con: would take much longer to run
- Con: Need to be very careful about making sure caching doesn't wreck the curve (or if it does, it wrecks the curve equally for all tests)
- Stateful Tracking: For example, the rust compiler team does some [bananas performance tracking](https://perf.rust-lang.org/). This option could be done in tandem with the above options, however it would require results be stored somewhere.
- Pro: we can graph our performance history and look really cool.
- Pro: Variance-aware testing would run in half the time since you can just reference old runs for comparison
- Con: state in tests sucks
- Con: longer to build
- Performance Profiling: Running a sampling-based profiler through a series of standardized test runs (test designed to hit as many/all of the code paths in the codebase) to determine if any particular function/class/other code has regressed in performance.
- Pro: easy to find the cause of the perf. regression
- Pro: should be able to run on a fairly small project size without losing much test resolution (a 5% change in a function should be evident with even a single case that runs that code path)
- Con: complex to build
- Con: compute intensive
- Con: requires stored results to compare against
## Decision
We decided to start with variance-aware testing with the ability to add stateful tracking by leveraging `hyperfine` which does all the variance work for us, and outputs clear json artifacts. Since we're running perfornace testing on a schedule it doesn't matter that as we add more tests it may take hours to run. The artifacts are all stored in the github action runs today, but could easily be changed to be sent somewhere in the action to track over time.
## Status
Completed
## Consequences
We now have the ability to more rigorously detect performance regressions, but we do not have a solid way to identify where that regression is coming from. Adding Performance Profiling cababilities will help with this, but for now just running it nightly should help us narrow it down to specific commits. As we add more performance tests, the testing matrix may take hours to run which consumes resources on GitHub Actions. Because performance testing is asynchronous, failures are easier to miss or ignore, and because it is non-deterministic it adds a non-trivial amount of complexity to our development process.

View File

@@ -1,34 +0,0 @@
# Structured Logging Arch
## Context
Consumers of dbt have been relying on log parsing well before this change. However, our logs were never optimized for programatic consumption, nor were logs treated like a formal interface between dbt and users. dbt's logging strategy was changed explicitly to address these two realities.
### Options
#### How to structure the data
- Using a library like structlog to represent log data with structural types like dictionaries. This would allow us to easily add data to a log event's context at each call site and have structlog do all the string formatting and io work.
- Creating our own nominal type layer that describes each event in source. This allows event fields to be enforced statically via mypy accross all call sites.
#### How to output the data
- Using structlog to output log lines regardless of if we used it to represent the data. The defaults for structlog are good, and it handles json vs text and formatting for us.
- Using the std lib logger to log our messages more manually. Easy to use, but does far less for us.
## Decision
#### How to structure the data
We decided to go with a custom nominal type layer even though this was going to be more work. This type layer centralizes our assumptions about what data each log event contains, and allows us to use mypy to enforce these centralized assumptions acrosss the codebase. This is all for the purpose for treating logs like a formal interface between dbt and users. Here are two concrete, practical examples of how this pattern is used:
1. On the abstract superclass of all events, there are abstract methods and fields that each concrete class must implement such as `level_tag()` and `code`. If you make a new concrete event type without those, mypy will fail and tell you that you need them, preventing lost log lines, and json log events without a computer-friendly code.
2. On each concrete event, the fields we need to construct the message are explicitly in the source of the class. At every call site if you construct an event without all the necessary data, mypy will fail and tell you which fields you are missing.
Using mypy to enforce these assumptions is a step better than testing becacuse we do not need to write tests to run through every branch of dbt that it could take. Because it is checked statically on every file, mypy will give us these guarantees as long as it is configured to run everywhere.
#### How to output the data
We decided to use the std lib logger because it was far more difficult than we expected to get to structlog to work properly. Documentation was lacking, and reading the source code wasn't a quick way to learn. The std lib logger was used mostly out of a necessity, and because many of the pleasantries you get from using a log library we had already chosen to do explicitly with functions in our nominal typing layer. Swapping out the std lib logger in the future should be an easy task should we choose to do it.
## Status
Completed
## Consequences
Adding a new log event is more cumbersome than it was previously: instead of writing the message at the log callsite, you must create a new concrete class in the event types. This is more opaque for new contributors. The json serialization approach we are using via `asdict` is fragile and unoptimized and should be replaced.
All user-facing log messages now live in one file which makes the job of conforming them much simpler. Because they are all nominally typed separately, it opens up the possibility to have log documentation generated from the type hints as well as outputting our logs in multiple human languages if we want to translate our messages.

View File

@@ -1,68 +0,0 @@
# Python Model Arch
## Context
We are thinking of supporting `python` ([roadmap](https://github.com/dbt-labs/dbt-core/blob/main/docs/roadmap/2022-05-dbt-a-core-story.md#scene-3-python-language-dbt-models), [discussion](https://github.com/dbt-labs/dbt-core/discussions/5261)) as a language other than SQL in dbt-core. This would allow users to express transformation logic that is tricky to do in SQL and have more libraries available to them.
### Options
#### Where to run the code
- running it locally where we run dbt core.
- running it in the cloud providers' environment.
#### What are the guardrails dbt would enforce for the python model
- None, users can write whatever code they like.
- focusing on data transformation logic where each python model should have a model function that returns a database object for dbt to materialize.
#### Where should the implementation live
Two places we need to consider are `dbt-core` and each individual adapter code-base. What are the pieces needed? How do we decide what goes where?
#### Are we going to allow writing macros in python
- Not allowing it.
- Allowing certain Jinja templating
- Allow everything
## Decisions
#### Where to run the code
In the same idea of dbt is not your query engine, we don't want dbt to be your python runtime. Instead, we want dbt to focus on being the place to express transformation logic. So python model will be following the existing pattern of the SQL model(parse and compile user written logic and submit it to your computation engine).
#### What are the guardrails dbt would enforce for the python model
We want dbt to focus on transformation logic, so we opt for setting up some tools and guardrails for the python model to focus on doing data transformation.
1. A `dbt` object would have functions including `dbt.ref`, `dbt.source` function to reference other models and sources in the dbt project, the return of the function will be a dataframe of referenced resources.
1. Code in the python model node should include a model function that takes a `dbt` object as an argument, do the data transformation logic inside, and return a dataframe in the end. We think folks should load their data into dataframes using the `dbt.ref`, `dbt.source` provided over raw data references. We also think logic to write dataframe to database objects should live in materialization logic instead of transformation code.
1. That `dbt` object should also have an attribute called `dbt.config` to allow users to define configurations of the current python model like materialization logic, a specific version of python libraries, etc. This `dbt.config` object should also provide a clear access function for variables defined in project YAML. This way user can access arbitrary configuration at runtime.
#### Where should the implementation live
Logic in core should be universal and carry the opionions we have for the feature, this includes but not limited to
1. parsing of python file in dbt-core to get the `ref`, `source`, and `config` information. This information is used to place the python model in the correct place in project DAG and generate the correct python code sent to compute engine.
1. `language` as a new top-level node property.
1. python template code that is not cloud provider-specific, this includes implementation for `dbt.ref`, `dbt.source`. We would use ast parser to parse out all of the `dbt.ref`, `dbt.source` inside python during parsing time, and generate what database resources those points to during compilation time. This should allow user to copy-paste the "compiled" code, and run it themselves against the data warehouse — just like with SQL models. A example of definition for `dbt.ref` could look like this
```python
def ref(*args):
refs = {"my_sql_model": "DBT_TEST.DBT_SOMESCHEMA.my_sql_model"}
key = ".".join(args)
return load_df_function(refs[key])
```
1. functional tests for the python model, these tests are expected to be inherited in the adapter code to make sure intended functions are met.
1. Generalizing the names of properties (`sql`, `raw_sql`, `compiled_sql`) for a future where it's not all SQL.
1. implementation of restrictions have for python model.
Computing engine specific logic should live in adapters, including but not limited to
- `load_df_function` of how to load a dataframe for a given database resource,
- `materialize` of how to save a dataframe to table or other materialization formats.
- some kind of `submit_python` function for submitting python code to compute engine.
- addition or modification `materialization` macro to add materialize the python model
#### Are we going to allow writing macros in python
We don't know yet. We use macros in SQL models because it allows us to achieve what SQL can't do. But with python being a programming language, we don't see a strong need for macros in python yet. So we plan to strictly disable that in the user-written code in the beginning, and potentially add more as we hear from the community.
## Status
Implementing
# Consequences
Users would be able to write python transformation models in dbt and run them as part of their data transformation workflow.

View File

@@ -1,53 +0,0 @@
# Use of betterproto package for generating Python message classes
## Context
We are providing proto definitions for our structured logging messages, and as part of that we need to also have Python classes for use in our Python codebase
### Options, August 30, 2022
#### Google protobuf package
You can use the google protobuf package to generate Python "classes", using the protobuf compiler, "protoc" with the "--python_out" option.
* It's not readable. There are no identifiable classes in the output.
* A "class" is generated using a metaclass when it is used.
* You can't subclass the generated classes, which don't act much like Python objects
* Since you can't put defaults or methods of any kind in these classes, and you can't subclass them, they aren't very usable in Python.
* Generated classes are not easily importable
* Serialization is via external utilities.
* Mypy and flake8 totally fail so you have to exclude the generated files in the pre-commit config.
#### betterproto package
* It generates readable "dataclass" classes.
* You can subclass the generated classes. (Though you still can't add additional attributes. But if we really needed to we might be able to modify the source code to do so.)
* Integrates much more easily with our codebase.
* Serialization (to_dict and to_json) is built in.
* Mypy and flake8 work on generated files.
* Additional benefits listed: [betterproto](https://github.com/danielgtaylor/python-betterproto)
## Revisited, March 21, 2023
We are switching away from using betterproto because of the following reasons:
* betterproto only suppports Optional fields in a beta release
* betterproto has had only beta releases for a few years
* betterproto doesn't support Struct, which we really need
* betterproto started changing our message names to be more "pythonic"
Steps taken to mitigate the drawbacks of Google protobuf from above:
* We are using a wrapping class around the logging events to enable a constructor that looks more like a Python constructor, as long as only keyword arguments are used.
* The generated file is skipped in the pre-commit config
* We can live with the awkward interfaces. It's just code.
Advantages of Google protobuf:
* Message can be constructed from a dictionary of all message values. With betterproto you had to pre-construct nested message objects, which kind of forced you to sprinkle generated message objects through the codebase.
* The Struct support works really well
* Type errors are caught much earlier and more consistently. Betterproto would accept fields of the wrong types, which was sometimes caught on serialization to a dictionary, and sometimes not until serialized to a binary string. Sometimes not at all.
Disadvantages of Google protobuf:
* You can't just set nested message objects, you have to use CopyFrom. Just code, again.
* If you try to stringify parts of the message (like in the constructed event message) it outputs in a bizarre "user friendly" format. Really bad for Struct, in particular.
* Python messages aren't really Python. You can't expect them to *act* like normal Python objects. So they are best kept isolated to the logging code only.
* As part of the not-really-Python, you can't use added classes to act like flags (Cache, NoFile, etc), since you can only use the bare generated message to construct other messages.

View File

Before

Width:  |  Height:  |  Size: 97 KiB

After

Width:  |  Height:  |  Size: 97 KiB

View File

Before

Width:  |  Height:  |  Size: 7.1 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

View File

Before

Width:  |  Height:  |  Size: 138 KiB

After

Width:  |  Height:  |  Size: 138 KiB

View File

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 49 KiB

View File

@@ -1 +0,0 @@
-e ./core

View File

@@ -1,3 +0,0 @@
The events outlined here exist to support "very very old versions of dbt-core, which expected to look directly at the HEAD branch of this github repo to find validation schemas".
Eventually these should go away (see https://github.com/dbt-labs/dbt-core/issues/7228)

View File

@@ -1,10 +0,0 @@
{
"type": "object",
"title": "invocation_env",
"description": "DBT invocation environment type",
"properties": {
"environment": {
"type": "string"
}
}
}

View File

@@ -1,43 +0,0 @@
{
"type": "object",
"title": "invocation",
"description": "Schema for a dbt invocation",
"properties": {
"project_id": {
"type": "string"
},
"user_id": {
"type": "string"
},
"invocation_id": {
"type": "string"
},
"command": {
"type": "string"
},
"command_options": {
"type": "string"
},
"progress": {
"type": "string",
"enum": ["start", "end"]
},
"version": {
"type": "string"
},
"remote_ip": {
"type": "string"
},
"run_type": {
"type": "string",
"enum": ["dry", "test", "regular"]
},
"result_type": {
"type": "string",
"enum": ["ok", "error"]
},
"result": {
"type": "string"
}
}
}

View File

@@ -1,16 +0,0 @@
{
"type": "object",
"title": "platform",
"description": "Schema for a dbt user's platform",
"properties": {
"platform": {
"type": "string"
},
"python": {
"type": "string"
},
"python_version": {
"type": "string"
}
}
}

View File

@@ -1,35 +0,0 @@
{
"type": "object",
"title": "run_model",
"description": "Schema for the execution of a model",
"properties": {
"index": {
"type": "number"
},
"total": {
"type": "number"
},
"execution_time": {
"type": "number",
"multiple_of": 0.01
},
"run_status": {
"type": "string"
},
"run_skipped": {
"type": "string"
},
"run_error": {
"type": "string"
},
"model_materialization": {
"type": "string"
},
"model_id": {
"type": "string"
},
"hashed_contents": {
"type": "string"
}
}
}

View File

@@ -1,118 +0,0 @@
# Performance Regression Testing
## Attention!
PLEASE READ THIS README IN THE MAIN BRANCH
The performance runner is always pulled from main regardless of the version being modeled or sampled. If you are not in the main branch, this information may be stale.
## Description
This test suite samples the performance characteristics of individual commits against performance models for prior releases. Performance is measured in project-command pairs which are assumed to conform to a normal distribution. The sampling and comparison is effecient enough to run against PRs.
This collection of projects and commands should expand over time to reflect user feedback about poorly performing projects to protect against poor performance in these scenarios in future versions.
Here are all the components of the testing module:
- dbt project setups that are known performance bottlenecks which you can find in `/performance/projects/`, and a runner written in Rust that runs specific dbt commands on each of the projects.
- Performance characteristics called "baselines" from released dbt versions in `/performance/baselines/`. Each branch will only have the baselines for its ancestors because when we compare samples, we compare against the lastest baseline available in the branch.
- A GitHub action for modeling the performance distribution for a new release: `/.github/workflows/model_performance.yml`.
- A GitHub action for sampling performance of dbt at your commit and comparing it against a previous release: `/.github/workflows/sample_performance.yml`.
At this time, the biggest risk in the design of this project is how to account for the natural variation of GitHub Action runs. Typically, performance work is done on dedicated hardware to elimiate this factor. However, there are ways to integrate the variation in obeservation tools if it can be measured.
## Adding Test Scenarios
A clear process for maintainers and community members to add new performance testing targets will exist after the next stage of the test suite is complete. For details, see #4768.
## Investigating Regressions
If your commit has failed one of the performance regression tests, it does not necessarily mean your commit has a performance regression. However, the observed runtime value was so much slower than the expected value that it was unlikely to be random noise. If it is not due to random noise, this commit contains the code that is causing this performance regression. However, it may not be the commit that introduced that code. That code may have been introduced in the commit before even if it passed due to natural variation in sampling. When investigating a performance regression, start with the failing commit and working your way backwards.
Here's an example of how this could happen:
```
Commit
A <- last release
B
C <- perf regression
D
E
F <- the first failing commit
```
- Commit A is measured to have an expected value for one performance metric of 30 seconds with a standard deviation of 0.5 seconds.
- Commit B doesn't introduce a performance regression and passes the performance regression tests.
- Commit C introduces a performance regression such that the new expected value of the metric is 32 seconds with a standard deviation still at 0.5 seconds, but we don't know this because we don't estimate the whole performance distribution on every commit because that is far too much work to run on every commit. It passes the performance regression test because we happened to sample a value of 31 seconds which is within our threshold for the original model. It's also only 2 standard deviations away from the actual performance model of commit C so even though it's not going to be a super common situation, it is expected to happen sometimes.
- Commit D samples a value of 31.4 seconds and passes
- Commit E samples a value of 31.2 seconds and passes
- Commit F samples a value of 32.9 seconds and fails
Because these performance regression tests are non-deterministic, it is frequently going to be possible to rerun the test on a failing commit and get it to pass. The more often we do this, the farther down the commit history we will be punting detection.
If your PR is against `main` your commits will be compared against the latest baseline measurement found in `performance/baselines`. If this commit needs to be backported, that PR will be against the `.latest` branch and will also compare against the latest baseline measurement found in `performance/baselines` in that branch. These two versions may be the same or they may be different. For example, If the latest version of dbt is v1.99.0, the performance sample of your PR against main will compare against the baseline for v1.99.0. When those commits are backported to `1.98.latest` those commits will be compared against the baseline for v1.98.6 (or whatever the latest is at that time). Even if the compared baseline is the same, a different sample is taken for each PR. In this case, even though it should be rare, it is possible for a performance regression to be detected in one of the two PRs even with the same baseline due to variation in sampling.
## The Statistics
Particle physicists need to be confident in declaring new discoveries, snack manufacturers need to be sure each individual item is within the regulated margin of error for nutrition facts, and weight-rated climbing gear needs to be produced so you can trust your life to every unit that comes off the line. All of these use cases use the same kind of math to meet their needs: sigma-based p-values. This section will peel apart that math with the help of a physicist and walk through how we apply this approach to performance regression testing in this test suite.
You are likely familiar with forming a hypothesis of the form "A and B are correlated" which is known as _the research hypothesis_. Additionally, it follows that the hypothesis "A and B are not correlated" is relevant and is known as _the null hypothesis_. When looking at data, we commonly use a _p-value_ to determine the significance of the data. Formally, a _p-value_ is the probability of obtaining data at least as extreme as the ones observed, if the null hypothesis is true. To refine this definition, The experimental partical physicist [Dr. Tommaso Dorigo](https://userswww.pd.infn.it/~dorigo/#about) has an excellent [glossary](https://www.science20.com/quantum_diaries_survivor/fundamental_glossary_higgs_broadcast-85365) of these terms that helps clarify: "'Extreme' is quite tricky instead: it depends on what is your 'alternate hypothesis' of reference, and what kind of departure it would produce on the studied statistic derived from the data. So 'extreme' will mean 'departing from the typical values expected for the null hypothesis, toward the values expected from the alternate hypothesis.'" In the context of performance regression testing, our research hypothesis is that "after commit A, the codebase includes a performance regression" which means we expect the runtime of our measured processes to be _slower_, not faster than the expected value.
Given this definition of p-value, we need to explicitly call out the common tendancy to apply _probability inversion_ to our observations. To quote [Dr. Tommaso Dorigo](https://www.science20.com/quantum_diaries_survivor/fundamental_glossary_higgs_broadcast-85365) again, "If your ability on the long jump puts you in the 99.99% percentile, that does not mean that you are a kangaroo, and neither can one infer that the probability that you belong to the human race is 0.01%." Using our previously defined terms, the p-value is _not_ the probability that the null hypothesis _is true_.
This brings us to calculating sigma values. Sigma refers to the standard deviation of a statistical model, which is used as a measurement of how far away an observed value is from the expected value. When we say that we have a "3 sigma result" we are saying that if the null hypothesis is true, this is a particularly unlikely observation—not that the null hypothesis is false. Exactly how unlikely depends on what the expected values from our research hypothesis are. In the context of performance regression testing, if the null hypothesis is false, we are expecting the results to be _slower_ than the expected value not _slower or faster_. Looking at a normal distrubiton below, we can see that we only care about one _half_ of the distribution: the half where the values are slower than the expected value. This means that when we're calculating the p-value we are not including both sides of the normal distribution.
![normal distibution](./images/normal.svg)
Because of this, the following table describes the significance of each sigma level for our _one-sided_ hypothesis:
| σ | p-value | scientific significance |
| --- | -------------- | ----------------------- |
| 1 σ | 1 in 6 | |
| 2 σ | 1 in 44 | |
| 3 σ | 1 in 741 | evidence |
| 4 σ | 1 in 31,574 | |
| 5 σ | 1 in 3,486,914 | discovery |
When detecting performance regressions that trigger alerts, block PRs, or delay releases we want to be conservative enough that detections are infrequently triggered by noise, but not so conservative as to miss most actual regressions. This test suite uses a 3 sigma standard so that only about 1 in every 700 runs is expected to fail the performance regression test suite due to expected variance in our measurements.
In practice, the number of performance regression failures due to random noise will be higher because we are not incorporating the variance of the tools we use to measure, namely GHA.
### Concrete Example: Performance Regression Detection
The following example data was collected by running the code in this repository in Github Actions.
In dbt v1.0.3, we have the following mean and standard deviation when parsing a dbt project with 2000 models:
μ (mean): 41.22<br/>
σ (stddev): 0.2525<br/>
The 2-sided 3 sigma range can be calculated with these two values via:
x < μ - 3 σ or x > μ + 3 σ<br/>
x < 41.22 - 3 * 0.2525 or x > 41.22 + 3 * 0.2525 <br/>
x < 40.46 or x > 41.98<br/>
It follows that the 1-sided 3 sigma range for performance regressions is just:<br/>
x > 41.98
If when we sample a single `dbt parse` of the same project with a commit slated to go into dbt v1.0.4, we observe a 42s parse time, then this observation is so unlikely if there were no code-induced performance regressions, that we should investigate if there is a performance regression in any of the commits between this failure and the commit where the initial distribution was measured.
Observations with 3 sigma significance that are _not_ performance regressions could be due to observing unlikely values (roughly 1 in every 750 observations), or variations in the instruments we use to take these measurements such as github actions. At this time we do not measure the variation in the instruments we use to account for these in our calculations which means failures due to random noise are more likely than they would be if we did take them into account.
### Concrete Example: Performance Modeling
Once a new dbt version is released (excluding pre-releases), the performance characteristics of that released version need to be measured. In this repository this measurement is referred to as a baseline.
After dbt v1.0.99 is released, a github action running from `main`, for the latest version of that action, takes the following steps:
- Checks out main for the latest performance runner
- pip installs dbt v1.0.99
- builds the runner if it's not already in the github actions cache
- uses the performance runner model sub command with `./runner model`.
- The model subcommand calls hyperfine to run all of the project-command pairs a large number of times (maybe 20 or so) and save the hyperfine outputs to files in `performance/baselines/1.0.99/` one file per command-project pair.
- The action opens two PRs with these files: one against `main` and one against `1.0.latest` so that future PRs against these branches will detect regressions against the performance characteristics of dbt v1.0.99 instead of v1.0.98.
- The release driver for dbt v1.0.99 reviews and merges these PRs which is the sole deliverable of the performance modeling work.
## Future work
- pin commands to projects by reading commands from a file defined in the project.
- add a postgres warehouse to run `dbt compile` and `dbt run` commands
- add more projects to test different configurations that have been known performance bottlenecks
- Account for github action variation: Either measure it, or eliminate it. To measure it we could set up another action that periodically samples the same version of dbt and use a 7 day rolling variation. To eliminate it we could run the action using something like [act](https://github.com/nektos/act) on dedicated hardware.
- build in a git-bisect run to automatically identify the commits that caused a performance regression by modeling each commit's expected value for the failing metric. Running this automatically, or even providing a script to do this locally would be useful.

View File

@@ -1 +0,0 @@
# placeholder for baselines directory

View File

@@ -1 +0,0 @@
{"version":"1.2.0","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-09T13:49:21.773314639Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":44.19299478025,"stddev":0.2429047068802047,"median":44.17483035975,"user":43.4559033,"system":0.5913923200000001,"min":43.81193651175,"max":44.61466355675,"times":[44.597056272749995,43.96855886975,43.90405755675,44.14156308475,44.49939515775,44.11553658675,44.30173547275,43.932534850749995,43.843978513749995,44.08611205475,43.99133546975,44.39880287075,44.20809763475,44.10553540675,43.81193651175,44.24880915975,44.408731260749995,44.61466355675,44.31538149475,44.36607381875]}}

View File

@@ -1 +0,0 @@
{"version":"1.3.0","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-05T21:26:14.178981105Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":57.34703829679,"stddev":1.264070714183875,"median":57.16122855003999,"user":56.124171495,"system":0.6879409899999999,"min":56.03876437454,"max":62.15960342254,"times":[56.45744564454,56.27775436354,56.50617413654,57.34027474654,57.38757627154,57.17093026654,56.29133183054,56.89527107354,57.48466258854,56.87484084654,57.14306217354,57.13537045454,58.00688797954,57.15152683354,57.65667721054,56.03876437454,57.68217591654,58.03524921154,62.15960342254,57.24518659054]}}

View File

@@ -1 +0,0 @@
{"version":"1.3.4","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-05T21:21:13.216166358Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":43.251824134715,"stddev":0.2626902769638351,"median":43.195683199465,"user":42.82592822,"system":0.444670655,"min":42.988474644965,"max":44.268850566965,"times":[43.117288670965,43.276664016965,44.268850566965,43.175714899965,43.069990564965,43.353031152965,43.064902203965,43.104385867965,43.228237677965,43.151709868965,43.410496816965,43.139105498965,43.112643799965,43.19391977696501,43.303759563965,43.312242193965,43.197446621965,43.297804568965,42.988474644965,43.269813715965]}}

View File

@@ -1 +0,0 @@
{"version":"1.4.0","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-05T16:07:45.035878166Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":53.44691701517499,"stddev":1.9217109918352029,"median":54.31170254667501,"user":52.633288745,"system":0.636774385,"min":49.603911921675,"max":55.743179437675,"times":[55.021354517675,54.25164864567501,54.975722432675,52.635067164675,53.571658032675,51.382873180675,50.043912339675,49.603911921675,51.132099650675,54.615839302675,52.565473620675,51.152761771675,52.459746128675,55.743179437675,54.936982552675005,54.37175644767501,54.852100134675,55.048404930675,55.185989433675005,55.387858656675]}}

View File

@@ -1 +0,0 @@
{"version":"1.4.1","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-05T21:23:11.574110714Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":51.81799889823499,"stddev":0.49021827459557155,"median":51.877185231885,"user":50.937133405,"system":0.66050657,"min":50.713426685384995,"max":52.451290474385,"times":[51.868264556385,51.967490942385,52.321507218385,51.886105907385,52.451290474385,52.283930937385,51.818989812385,51.978303421385,51.213362656385,50.713426685384995,52.258454610385,51.758877730384995,51.082508232384995,51.128473688385,51.631421367384995,52.194084467385,52.240100726384995,51.64952270338499,51.49970049638499,52.414161330385]}}

View File

@@ -1 +0,0 @@
{"version":"1.4.6","metric":{"name":"parse","project_name":"01_2000_simple_models"},"ts":"2023-05-05T21:31:07.688350571Z","measurement":{"command":"dbt parse --no-version-check --profiles-dir ../../project_config/","mean":71.63662348534498,"stddev":1.0486666901040516,"median":71.48043032754501,"user":70.594864785,"system":0.7236668199999998,"min":70.179068043545,"max":73.74777047454499,"times":[70.885587350545,71.733729563545,71.902222862545,70.362755346545,70.179068043545,70.902001253545,72.798824228545,73.209881293545,70.520832511545,71.143232155545,71.623572279545,71.337288375545,71.763221403545,70.426712498545,70.82376365454499,72.50315140754499,71.161477365545,72.747252973545,73.74777047454499,72.96012466454499]}}

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 42 KiB

View File

@@ -1 +0,0 @@
id: 5d0c160e-f817-4b77-bce3-ffb2e37f0c9b

View File

@@ -1,12 +0,0 @@
default:
target: dev
outputs:
dev:
type: postgres
host: localhost
user: dummy
password: dummy_password
port: 5432
dbname: dummy
schema: dummy
threads: 4

View File

@@ -1,13 +0,0 @@
name: 'my_new_package'
version: 1.0.0
config-version: 2
profile: 'default'
model-paths: ["models"]
target-path: "target"
clean-targets:
- "target"
- "dbt_modules"
models:
materialized: view

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_0
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_1
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_2
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_3
version: 2

View File

@@ -1,3 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_4
version: 2

View File

@@ -1,5 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_2') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_5
version: 2

View File

@@ -1,5 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_3') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_6
version: 2

View File

@@ -1,7 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_3') }}
union all
select * from {{ ref('node_6') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_7
version: 2

View File

@@ -1,7 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_3') }}
union all
select * from {{ ref('node_6') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_8
version: 2

View File

@@ -1,9 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_3') }}
union all
select * from {{ ref('node_6') }}
union all
select * from {{ ref('node_7') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_9
version: 2

View File

@@ -1,9 +0,0 @@
select 1 as id
union all
select * from {{ ref('node_0') }}
union all
select * from {{ ref('node_3') }}
union all
select * from {{ ref('node_6') }}
union all
select * from {{ ref('node_8') }}

View File

@@ -1,11 +0,0 @@
models:
- columns:
- name: id
tests:
- unique
- not_null
- relationships:
field: id
to: ref('node_0')
name: node_10
version: 2

Some files were not shown because too many files have changed in this diff Show More