mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-17 19:31:34 +00:00
Compare commits
222 Commits
update-ind
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b7d9b5704 | ||
|
|
c3d87b89fb | ||
|
|
0f084e16ca | ||
|
|
3464be7f70 | ||
|
|
407f6caa1c | ||
|
|
ad575ec699 | ||
|
|
f582ac2488 | ||
|
|
f5f0735d00 | ||
|
|
3abf575fa6 | ||
|
|
a42303c3af | ||
|
|
6fccfe84ea | ||
|
|
fd6ec71dab | ||
|
|
ae957599e1 | ||
|
|
f080346227 | ||
|
|
2a75dd4683 | ||
|
|
945539e3ae | ||
|
|
84230ce333 | ||
|
|
35c09203ad | ||
|
|
1625eb059a | ||
|
|
2c43af897d | ||
|
|
6e1f64f8b4 | ||
|
|
e9a2b548cb | ||
|
|
89caa33fb4 | ||
|
|
30b8a92e38 | ||
|
|
b95f7a7f2c | ||
|
|
e451a371e6 | ||
|
|
81067d4fc4 | ||
|
|
3198ce4809 | ||
|
|
0c51985c83 | ||
|
|
e26af57989 | ||
|
|
bdf28d7eff | ||
|
|
289d2dd932 | ||
|
|
8a17a0d7e7 | ||
|
|
8c6bec4fb5 | ||
|
|
7f5abdc565 | ||
|
|
f714e84282 | ||
|
|
7f92c6e003 | ||
|
|
8de0229a04 | ||
|
|
dd77210756 | ||
|
|
8df5c96f3d | ||
|
|
6b5db1796f | ||
|
|
3224589fe7 | ||
|
|
b71ceb3166 | ||
|
|
4d4b05effc | ||
|
|
316ecfca28 | ||
|
|
d07bfda9df | ||
|
|
8ae689c674 | ||
|
|
bdb79e8626 | ||
|
|
f7b7935a97 | ||
|
|
3d96b4e36c | ||
|
|
7920b0e71d | ||
|
|
a0674db840 | ||
|
|
ba6c7baf1d | ||
|
|
8be063502b | ||
|
|
78c05718c5 | ||
|
|
d18f50bbb8 | ||
|
|
ffa75ca9ff | ||
|
|
8f847167fa | ||
|
|
cd6bb9e782 | ||
|
|
ef9abe6c06 | ||
|
|
40c350ff21 | ||
|
|
c7d8693f70 | ||
|
|
6743e32574 | ||
|
|
f6cdacc61e | ||
|
|
5db0b81da1 | ||
|
|
fc8eb820aa | ||
|
|
fc83f5edfa | ||
|
|
8248d1eb53 | ||
|
|
6b9c1da1ae | ||
|
|
7940ad5c78 | ||
|
|
3ec8fa79bd | ||
|
|
396cf2d683 | ||
|
|
87b1143a62 | ||
|
|
75a09621cd | ||
|
|
5e9f1b515f | ||
|
|
25a68a990c | ||
|
|
a86e2b4ffc | ||
|
|
94917432f9 | ||
|
|
d1857b39ca | ||
|
|
2ff3f20863 | ||
|
|
5e3d418264 | ||
|
|
5d32aa8b62 | ||
|
|
d8b1bf53f7 | ||
|
|
1076352293 | ||
|
|
1fe9c1bbfe | ||
|
|
41e4836c0f | ||
|
|
b590045b9f | ||
|
|
1fd4d2eae6 | ||
|
|
ac66f91351 | ||
|
|
359a2c0cc5 | ||
|
|
bbdb98fa5d | ||
|
|
a8d4ba2b4a | ||
|
|
09e973d24a | ||
|
|
730e40a867 | ||
|
|
a1e4753020 | ||
|
|
3ac20ce7a8 | ||
|
|
aa23af98e5 | ||
|
|
46da967115 | ||
|
|
db694731c9 | ||
|
|
7016cd3085 | ||
|
|
9ca10fbfd9 | ||
|
|
3308a4365e | ||
|
|
f8bfd32ed6 | ||
|
|
3e437a6734 | ||
|
|
9e633f6178 | ||
|
|
d182d06644 | ||
|
|
054c6fde37 | ||
|
|
4c326e40b5 | ||
|
|
8fe5ea1ee7 | ||
|
|
16f5023f4d | ||
|
|
c6b8f7e595 | ||
|
|
77aeb3ea68 | ||
|
|
1e20772d33 | ||
|
|
8ce2c46a2f | ||
|
|
aeaaedcaa1 | ||
|
|
6c111f2e31 | ||
|
|
139b9ac54f | ||
|
|
cc8541c05f | ||
|
|
ab500a9709 | ||
|
|
1d3d315249 | ||
|
|
b35ad46e3f | ||
|
|
c28cb92af5 | ||
|
|
b56d96df5e | ||
|
|
37d382c8e7 | ||
|
|
9b7f4ff842 | ||
|
|
555ff8091f | ||
|
|
98fddcf54f | ||
|
|
d652359c61 | ||
|
|
f7d21e012e | ||
|
|
e1fa461186 | ||
|
|
1153597970 | ||
|
|
09f9febc25 | ||
|
|
22181409f6 | ||
|
|
f25a474f75 | ||
|
|
3c55806203 | ||
|
|
bba020fcc0 | ||
|
|
84eb0ff672 | ||
|
|
3695698e22 | ||
|
|
9ca1bc5b4c | ||
|
|
5f66678f6d | ||
|
|
63262e93cb | ||
|
|
374412af53 | ||
|
|
47848b8ea8 | ||
|
|
3d09872a56 | ||
|
|
dfa7d06526 | ||
|
|
7f57dd5a30 | ||
|
|
56bfbeaedd | ||
|
|
1dd26e79af | ||
|
|
86223609dd | ||
|
|
21a46332f1 | ||
|
|
ff2726c3b5 | ||
|
|
014444dc18 | ||
|
|
25c2042dc9 | ||
|
|
0a160fc27a | ||
|
|
c598741262 | ||
|
|
f9c2b9398f | ||
|
|
cab6dabbc7 | ||
|
|
e1621ebc54 | ||
|
|
cd90d4493c | ||
|
|
560d151dcd | ||
|
|
229c537748 | ||
|
|
79ad0a3243 | ||
|
|
c668846404 | ||
|
|
c4958de166 | ||
|
|
33161a3035 | ||
|
|
471b816dcd | ||
|
|
bef2d20c21 | ||
|
|
2a26fabfdf | ||
|
|
4c7d922a6d | ||
|
|
b03291548a | ||
|
|
a7af3b3831 | ||
|
|
6e4564ab05 | ||
|
|
1aeff2c58f | ||
|
|
601fee0d5f | ||
|
|
88b8b10df1 | ||
|
|
4ea0e1007c | ||
|
|
a309283a7c | ||
|
|
b10fa79ae8 | ||
|
|
37e2725038 | ||
|
|
37fd299ad0 | ||
|
|
a94027acea | ||
|
|
b59c9075e2 | ||
|
|
c215697a02 | ||
|
|
d936a630c1 | ||
|
|
11ee2b9c42 | ||
|
|
64c59476f4 | ||
|
|
2bae05b8ed | ||
|
|
ca163c3d6e | ||
|
|
9a796aa202 | ||
|
|
51ff85bb2d | ||
|
|
d389ff1450 | ||
|
|
4415731da4 | ||
|
|
0fdc83af9d | ||
|
|
71a8a41104 | ||
|
|
da19d7ba9f | ||
|
|
1475abb1cb | ||
|
|
27b2f965dd | ||
|
|
100352d6b4 | ||
|
|
8ee8b2560a | ||
|
|
d4a6482091 | ||
|
|
8639290108 | ||
|
|
e699f5d042 | ||
|
|
e977b3eee5 | ||
|
|
c5be8e2a93 | ||
|
|
bff116dbed | ||
|
|
4df120e40e | ||
|
|
e53420c1d0 | ||
|
|
88ccc8a447 | ||
|
|
a98059967d | ||
|
|
b680c7ae95 | ||
|
|
a677abd5e8 | ||
|
|
8c850b58cb | ||
|
|
a34267f54b | ||
|
|
155482851a | ||
|
|
81386a7a43 | ||
|
|
d8e38c1a1d | ||
|
|
3e37d77780 | ||
|
|
e0783c2922 | ||
|
|
c2d4643f9d | ||
|
|
84456f50f6 | ||
|
|
fb10bb4aea | ||
|
|
366d4ad04a |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 1.9.0a1
|
||||
current_version = 1.10.0a1
|
||||
parse = (?P<major>[\d]+) # major version number
|
||||
\.(?P<minor>[\d]+) # minor version number
|
||||
\.(?P<patch>[\d]+) # patch version number
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Dependencies
|
||||
body: Remove logbook dependency
|
||||
time: 2024-05-09T09:37:17.745129-05:00
|
||||
custom:
|
||||
Author: emmyoop
|
||||
Issue: "8027"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Features
|
||||
body: serialize inferred primary key
|
||||
time: 2024-05-06T17:56:42.757673-05:00
|
||||
custom:
|
||||
Author: dave-connors-3
|
||||
Issue: "9824"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Features
|
||||
body: 'Add unit_test: selection method'
|
||||
time: 2024-05-07T16:27:17.047585-04:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10053"
|
||||
6
.changes/unreleased/Features-20241104-120053.yaml
Normal file
6
.changes/unreleased/Features-20241104-120053.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Add new hard_deletes="new_record" mode for snapshots.
|
||||
time: 2024-11-04T12:00:53.95191-05:00
|
||||
custom:
|
||||
Author: peterallenwebb
|
||||
Issue: "10235"
|
||||
6
.changes/unreleased/Features-20241121-125630.yaml
Normal file
6
.changes/unreleased/Features-20241121-125630.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Add `batch` context object to model jinja context
|
||||
time: 2024-11-21T12:56:30.715473-06:00
|
||||
custom:
|
||||
Author: QMalcolm
|
||||
Issue: "11025"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Fixes
|
||||
body: Remove unused check_new method
|
||||
time: 2023-06-01T20:41:57.556342+02:00
|
||||
custom:
|
||||
Author: kevinneville
|
||||
Issue: "7586"
|
||||
@@ -1,7 +0,0 @@
|
||||
kind: Fixes
|
||||
body: 'Restore previous behavior for --favor-state: only favor defer_relation if not
|
||||
selected in current command"'
|
||||
time: 2024-05-08T15:11:27.510912+02:00
|
||||
custom:
|
||||
Author: jtcohen6
|
||||
Issue: "10107"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Fixes
|
||||
body: Unit test fixture (csv) returns null for empty value
|
||||
time: 2024-05-09T09:14:11.772709-04:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "9881"
|
||||
@@ -1,7 +0,0 @@
|
||||
kind: Fixes
|
||||
body: Fix json format log and --quiet for ls and jinja print by converting print call
|
||||
to fire events
|
||||
time: 2024-05-16T15:39:13.896723-07:00
|
||||
custom:
|
||||
Author: ChenyuLInx
|
||||
Issue: "8756"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Fixes
|
||||
body: Add resource type to saved_query
|
||||
time: 2024-05-16T22:35:10.287514-07:00
|
||||
custom:
|
||||
Author: ChenyuLInx
|
||||
Issue: "10168"
|
||||
6
.changes/unreleased/Fixes-20240822-122132.yaml
Normal file
6
.changes/unreleased/Fixes-20240822-122132.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: dbt retry does not respect --threads
|
||||
time: 2024-08-22T12:21:32.358066+05:30
|
||||
custom:
|
||||
Author: donjin-master
|
||||
Issue: "10584"
|
||||
6
.changes/unreleased/Fixes-20241121-181739.yaml
Normal file
6
.changes/unreleased/Fixes-20241121-181739.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Catch DbtRuntimeError for hooks
|
||||
time: 2024-11-21T18:17:39.753235Z
|
||||
custom:
|
||||
Author: aranke
|
||||
Issue: "11012"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Security
|
||||
body: Explicitly bind to localhost in docs serve
|
||||
time: 2024-05-22T09:45:40.748185-04:00
|
||||
custom:
|
||||
Author: ChenyuLInx michelleark
|
||||
Issue: "10209"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Under the Hood
|
||||
body: Clear error message for Private package in dbt-core
|
||||
time: 2024-05-02T15:44:30.713097-07:00
|
||||
custom:
|
||||
Author: ChenyuLInx
|
||||
Issue: "10083"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Under the Hood
|
||||
body: Enable use of context in serialization
|
||||
time: 2024-05-06T14:55:11.1812-04:00
|
||||
custom:
|
||||
Author: gshank
|
||||
Issue: "10093"
|
||||
@@ -1,6 +0,0 @@
|
||||
kind: Under the Hood
|
||||
body: Make RSS high water mark measurement more accurate on Linux
|
||||
time: 2024-05-19T15:59:46.700842315-04:00
|
||||
custom:
|
||||
Author: peterallenwebb
|
||||
Issue: "10177"
|
||||
1
.flake8
1
.flake8
@@ -7,6 +7,7 @@ ignore =
|
||||
W503 # makes Flake8 work like black
|
||||
W504
|
||||
E203 # makes Flake8 work like black
|
||||
E704 # makes Flake8 work like black
|
||||
E741
|
||||
E501 # long line checking is done in black
|
||||
exclude = test/
|
||||
|
||||
18
.github/ISSUE_TEMPLATE/code-docs.yml
vendored
Normal file
18
.github/ISSUE_TEMPLATE/code-docs.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: 📄 Code docs
|
||||
description: Report an issue for markdown files within this repo, such as README, ARCHITECTURE, etc.
|
||||
title: "[Code docs] <title>"
|
||||
labels: ["triage"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this code docs issue!
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Please describe the issue and your proposals.
|
||||
description: |
|
||||
Links? References? Anything that will give us more context about the issue you are encountering!
|
||||
|
||||
Tip: You can attach images by clicking this area to highlight it and then dragging files in.
|
||||
validations:
|
||||
required: false
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,5 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Documentation
|
||||
url: https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose
|
||||
about: Problems and issues with dbt product documentation hosted on docs.getdbt.com. Issues for markdown files within this repo, such as README, should be opened using the "Code docs" template.
|
||||
- name: Ask the community for help
|
||||
url: https://github.com/dbt-labs/docs.getdbt.com/discussions
|
||||
about: Need help troubleshooting? Check out our guide on how to ask
|
||||
|
||||
11
.github/actions/setup-postgres-linux/action.yml
vendored
11
.github/actions/setup-postgres-linux/action.yml
vendored
@@ -5,6 +5,15 @@ runs:
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
sudo systemctl start postgresql.service
|
||||
sudo apt-get --purge remove postgresql postgresql-*
|
||||
sudo apt update -y
|
||||
sudo apt install gnupg2 wget vim -y
|
||||
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
|
||||
curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc|sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
|
||||
sudo apt update -y
|
||||
sudo apt install postgresql-16
|
||||
sudo apt-get -y install postgresql postgresql-contrib
|
||||
sudo systemctl start postgresql
|
||||
sudo systemctl enable postgresql
|
||||
pg_isready
|
||||
sudo -u postgres bash ${{ github.action_path }}/setup_db.sh
|
||||
|
||||
@@ -5,7 +5,9 @@ runs:
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
brew services start postgresql
|
||||
brew install postgresql@16
|
||||
brew link postgresql@16 --force
|
||||
brew services start postgresql@16
|
||||
echo "Check PostgreSQL service is running"
|
||||
i=10
|
||||
COMMAND='pg_isready'
|
||||
|
||||
@@ -5,8 +5,22 @@ runs:
|
||||
steps:
|
||||
- shell: pwsh
|
||||
run: |
|
||||
$pgService = Get-Service -Name postgresql*
|
||||
Write-Host -Object "Installing PostgreSQL 16 as windows service..."
|
||||
$installerArgs = @("--install_runtimes 0", "--superpassword root", "--enable_acledit 1", "--unattendedmodeui none", "--mode unattended")
|
||||
$filePath = Invoke-DownloadWithRetry -Url "https://get.enterprisedb.com/postgresql/postgresql-16.1-1-windows-x64.exe" -Path "$env:PGROOT/postgresql-16.1-1-windows-x64.exe"
|
||||
Start-Process -FilePath $filePath -ArgumentList $installerArgs -Wait -PassThru
|
||||
|
||||
Write-Host -Object "Validating PostgreSQL 16 Install..."
|
||||
Get-Service -Name postgresql*
|
||||
$pgReady = Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
|
||||
$exitCode = $pgReady.ExitCode
|
||||
if ($exitCode -ne 0) {
|
||||
Write-Host -Object "PostgreSQL is not ready. Exitcode: $exitCode"
|
||||
exit $exitCode
|
||||
}
|
||||
|
||||
Write-Host -Object "Starting PostgreSQL 16 Service..."
|
||||
$pgService = Get-Service -Name postgresql-x64-16
|
||||
Set-Service -InputObject $pgService -Status running -StartupType automatic
|
||||
Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
|
||||
$env:Path += ";$env:PGBIN"
|
||||
bash ${{ github.action_path }}/setup_db.sh
|
||||
|
||||
14
.github/pull_request_template.md
vendored
14
.github/pull_request_template.md
vendored
@@ -1,7 +1,7 @@
|
||||
resolves #
|
||||
Resolves #
|
||||
|
||||
<!---
|
||||
Include the number of the issue addressed by this PR above if applicable.
|
||||
Include the number of the issue addressed by this PR above, if applicable.
|
||||
PRs for code changes without an associated issue *will not be merged*.
|
||||
See CONTRIBUTING.md for more information.
|
||||
|
||||
@@ -26,8 +26,8 @@ resolves #
|
||||
|
||||
### Checklist
|
||||
|
||||
- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me
|
||||
- [ ] I have run this code in development and it appears to resolve the stated issue
|
||||
- [ ] This PR includes tests, or tests are not required/relevant for this PR
|
||||
- [ ] This PR has no interface changes (e.g. macros, cli, logs, json artifacts, config files, adapter interface, etc) or this PR has already received feedback and approval from Product or DX
|
||||
- [ ] This PR includes [type annotations](https://docs.python.org/3/library/typing.html) for new and modified functions
|
||||
- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me.
|
||||
- [ ] I have run this code in development, and it appears to resolve the stated issue.
|
||||
- [ ] This PR includes tests, or tests are not required or relevant for this PR.
|
||||
- [ ] This PR has no interface changes (e.g., macros, CLI, logs, JSON artifacts, config files, adapter interface, etc.) or this PR has already received feedback and approval from Product or DX.
|
||||
- [ ] This PR includes [type annotations](https://docs.python.org/3/library/typing.html) for new and modified functions.
|
||||
|
||||
2
.github/workflows/check-artifact-changes.yml
vendored
2
.github/workflows/check-artifact-changes.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
run: |
|
||||
echo "CI failure: Artifact changes checked in core/dbt/artifacts directory."
|
||||
echo "Files changed: ${{ steps.check_artifact_changes.outputs.artifacts_changed_files }}"
|
||||
echo "To bypass this check, confirm that the change is not breaking (https://github.com/dbt-labs/dbt-core/blob/main/core/dbt/artifacts/README.md#breaking-changes) and add the 'artifact_minor_upgrade' label to the PR."
|
||||
echo "To bypass this check, confirm that the change is not breaking (https://github.com/dbt-labs/dbt-core/blob/main/core/dbt/artifacts/README.md#breaking-changes) and add the 'artifact_minor_upgrade' label to the PR. Modifications and additions to all fields require updates to https://github.com/dbt-labs/dbt-jsonschema."
|
||||
exit 1
|
||||
|
||||
- name: CI check passed
|
||||
|
||||
4
.github/workflows/docs-issue.yml
vendored
4
.github/workflows/docs-issue.yml
vendored
@@ -36,6 +36,6 @@ jobs:
|
||||
uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
|
||||
with:
|
||||
issue_repository: "dbt-labs/docs.getdbt.com"
|
||||
issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
|
||||
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
|
||||
issue_title: "[Core] Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
|
||||
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated.\n Originating from this issue: https://github.com/dbt-labs/dbt-core/issues/${{ github.event.issue.number }}"
|
||||
secrets: inherit
|
||||
|
||||
11
.github/workflows/main.yml
vendored
11
.github/workflows/main.yml
vendored
@@ -52,13 +52,14 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Install python dependencies
|
||||
run: |
|
||||
python -m pip install --user --upgrade pip
|
||||
python -m pip --version
|
||||
make dev
|
||||
make dev_req
|
||||
mypy --version
|
||||
dbt --version
|
||||
|
||||
@@ -74,7 +75,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
|
||||
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
|
||||
|
||||
env:
|
||||
TOXENV: "unit"
|
||||
@@ -139,7 +140,7 @@ jobs:
|
||||
- name: generate include
|
||||
id: generate-include
|
||||
run: |
|
||||
INCLUDE=('"python-version":"3.8","os":"windows-latest"' '"python-version":"3.8","os":"macos-12"' )
|
||||
INCLUDE=('"python-version":"3.9","os":"windows-latest"' '"python-version":"3.9","os":"macos-14"' )
|
||||
INCLUDE_GROUPS="["
|
||||
for include in ${INCLUDE[@]}; do
|
||||
for group in $(seq 1 ${{ env.PYTHON_INTEGRATION_TEST_WORKERS }}); do
|
||||
@@ -161,7 +162,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
|
||||
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
|
||||
os: [ubuntu-20.04]
|
||||
split-group: ${{ fromJson(needs.integration-metadata.outputs.split-groups) }}
|
||||
include: ${{ fromJson(needs.integration-metadata.outputs.include) }}
|
||||
@@ -263,7 +264,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Install python dependencies
|
||||
run: |
|
||||
|
||||
2
.github/workflows/model_performance.yml
vendored
2
.github/workflows/model_performance.yml
vendored
@@ -150,7 +150,7 @@ jobs:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.9"
|
||||
|
||||
- name: Install dbt
|
||||
run: pip install dbt-postgres==${{ needs.set-variables.outputs.release_id }}
|
||||
|
||||
21
.github/workflows/release.yml
vendored
21
.github/workflows/release.yml
vendored
@@ -247,3 +247,24 @@ jobs:
|
||||
|
||||
secrets:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
|
||||
|
||||
testing-slack-notification:
|
||||
# sends notifications to #slackbot-test
|
||||
name: Testing - Slack Notification
|
||||
if: ${{ failure() && inputs.test_run && !inputs.nightly_release }}
|
||||
|
||||
needs:
|
||||
[
|
||||
bump-version-generate-changelog,
|
||||
build-test-package,
|
||||
github-release,
|
||||
pypi-release,
|
||||
docker-release,
|
||||
]
|
||||
|
||||
uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
|
||||
with:
|
||||
status: "failure"
|
||||
|
||||
secrets:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_TESTING_WEBHOOK_URL }}
|
||||
|
||||
4
.github/workflows/schema-check.yml
vendored
4
.github/workflows/schema-check.yml
vendored
@@ -30,14 +30,14 @@ env:
|
||||
|
||||
jobs:
|
||||
checking-schemas:
|
||||
name: "Checking schemas"
|
||||
name: "Post-merge schema changes required"
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: 3.9
|
||||
|
||||
- name: Checkout dbt repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.9"
|
||||
|
||||
- name: Install python dependencies
|
||||
run: |
|
||||
|
||||
5
.github/workflows/test-repeater.yml
vendored
5
.github/workflows/test-repeater.yml
vendored
@@ -27,7 +27,6 @@ on:
|
||||
description: 'Version of Python to Test Against'
|
||||
type: choice
|
||||
options:
|
||||
- '3.8'
|
||||
- '3.9'
|
||||
- '3.10'
|
||||
- '3.11'
|
||||
@@ -36,7 +35,7 @@ on:
|
||||
type: choice
|
||||
options:
|
||||
- 'ubuntu-latest'
|
||||
- 'macos-12'
|
||||
- 'macos-14'
|
||||
- 'windows-latest'
|
||||
num_runs_per_batch:
|
||||
description: 'Max number of times to run the test per batch. We always run 10 batches.'
|
||||
@@ -101,7 +100,7 @@ jobs:
|
||||
|
||||
# mac and windows don't use make due to limitations with docker with those runners in GitHub
|
||||
- name: "Set up postgres (macos)"
|
||||
if: inputs.os == 'macos-12'
|
||||
if: inputs.os == 'macos-14'
|
||||
uses: ./.github/actions/setup-postgres-macos
|
||||
|
||||
- name: "Set up postgres (windows)"
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -57,6 +57,9 @@ test.env
|
||||
makefile.test.env
|
||||
*.pytest_cache/
|
||||
|
||||
# Unit test artifacts
|
||||
index.html
|
||||
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
@@ -105,3 +108,6 @@ venv/
|
||||
|
||||
# poetry
|
||||
poetry.lock
|
||||
|
||||
# asdf
|
||||
.tool-versions
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
[settings]
|
||||
profile=black
|
||||
extend_skip_glob=.github/*,third-party-stubs/*,scripts/*
|
||||
known_first_party=dbt,dbt_adapters,dbt_common,dbt_extractor,dbt_semantic_interface
|
||||
known_first_party=dbt,dbt_adapters,dbt_common,dbt_extractor,dbt_semantic_interfaces
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
exclude: ^(core/dbt/docs/build/|core/dbt/common/events/types_pb2.py|core/dbt/events/core_types_pb2.py|core/dbt/adapters/events/adapter_types_pb2.py)
|
||||
|
||||
# Force all unspecified python hooks to run python 3.8
|
||||
# Force all unspecified python hooks to run python 3.9
|
||||
default_language_version:
|
||||
python: python3
|
||||
|
||||
@@ -15,16 +15,19 @@ repos:
|
||||
args: [--unsafe]
|
||||
- id: check-json
|
||||
- id: end-of-file-fixer
|
||||
exclude: schemas/dbt/manifest/
|
||||
- id: trailing-whitespace
|
||||
exclude_types:
|
||||
- "markdown"
|
||||
- id: check-case-conflict
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
# rev must match what's in dev-requirements.txt
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.3.0
|
||||
# rev must match what's in dev-requirements.txt
|
||||
rev: 24.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
- id: black
|
||||
@@ -34,6 +37,7 @@ repos:
|
||||
- "--check"
|
||||
- "--diff"
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
# rev must match what's in dev-requirements.txt
|
||||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
@@ -41,6 +45,7 @@ repos:
|
||||
alias: flake8-check
|
||||
stages: [manual]
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# rev must match what's in dev-requirements.txt
|
||||
rev: v1.4.1
|
||||
hooks:
|
||||
- id: mypy
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
For information on prior major and minor releases, see their changelogs:
|
||||
|
||||
|
||||
* [1.9](https://github.com/dbt-labs/dbt-core/blob/1.9.latest/CHANGELOG.md)
|
||||
* [1.8](https://github.com/dbt-labs/dbt-core/blob/1.8.latest/CHANGELOG.md)
|
||||
* [1.7](https://github.com/dbt-labs/dbt-core/blob/1.7.latest/CHANGELOG.md)
|
||||
* [1.6](https://github.com/dbt-labs/dbt-core/blob/1.6.latest/CHANGELOG.md)
|
||||
|
||||
@@ -170,9 +170,9 @@ Finally, you can also run a specific test or group of tests using [`pytest`](htt
|
||||
|
||||
```sh
|
||||
# run all unit tests in a file
|
||||
python3 -m pytest tests/unit/test_base_column.py
|
||||
python3 -m pytest tests/unit/test_invocation_id.py
|
||||
# run a specific unit test
|
||||
python3 -m pytest tests/unit/test_base_column.py::TestNumericType::test__numeric_type
|
||||
python3 -m pytest tests/unit/test_invocation_id.py::TestInvocationId::test_invocation_id
|
||||
# run specific Postgres functional tests
|
||||
python3 -m pytest tests/functional/sources
|
||||
```
|
||||
|
||||
@@ -33,9 +33,6 @@ RUN apt-get update \
|
||||
python-is-python3 \
|
||||
python-dev-is-python3 \
|
||||
python3-pip \
|
||||
python3.8 \
|
||||
python3.8-dev \
|
||||
python3.8-venv \
|
||||
python3.9 \
|
||||
python3.9-dev \
|
||||
python3.9-venv \
|
||||
|
||||
4
Makefile
4
Makefile
@@ -144,3 +144,7 @@ help: ## Show this help message.
|
||||
@echo
|
||||
@echo 'options:'
|
||||
@echo 'use USE_DOCKER=true to run target in a docker container'
|
||||
|
||||
.PHONY: json_schema
|
||||
json_schema: ## Update generated JSON schema using code changes.
|
||||
scripts/collect-artifact-schema.py --path schemas
|
||||
|
||||
26
codecov.yml
26
codecov.yml
@@ -1,6 +1,7 @@
|
||||
ignore:
|
||||
- ".github"
|
||||
- ".changes"
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
@@ -11,3 +12,28 @@ coverage:
|
||||
default:
|
||||
target: auto
|
||||
threshold: 80%
|
||||
|
||||
comment:
|
||||
layout: "header, diff, flags, components" # show component info in the PR comment
|
||||
|
||||
component_management:
|
||||
default_rules: # default rules that will be inherited by all components
|
||||
statuses:
|
||||
- type: project # in this case every component that doens't have a status defined will have a project type one
|
||||
target: auto
|
||||
threshold: 0.1%
|
||||
- type: patch
|
||||
target: 80%
|
||||
individual_components:
|
||||
- component_id: unittests
|
||||
name: "Unit Tests"
|
||||
flag_regexes:
|
||||
- "unit"
|
||||
statuses:
|
||||
- type: patch
|
||||
target: 80%
|
||||
threshold: 5%
|
||||
- component_id: integrationtests
|
||||
name: "Integration Tests"
|
||||
flag_regexes:
|
||||
- "integration"
|
||||
|
||||
@@ -29,6 +29,10 @@ All existing resources are defined under `dbt/artifacts/resources/v1`.
|
||||
|
||||
## Making changes to dbt/artifacts
|
||||
|
||||
### All changes
|
||||
|
||||
All changes to any fields will require a manual update to [dbt-jsonschema](https://github.com/dbt-labs/dbt-jsonschema) to ensure live checking continues to work.
|
||||
|
||||
### Non-breaking changes
|
||||
|
||||
Freely make incremental, non-breaking changes in-place to the latest major version of any artifact (minor or patch bumps). The only changes that are fully forward and backward compatible are:
|
||||
@@ -42,9 +46,9 @@ These types of minor, non-breaking changes are tested by [tests/unit/artifacts/t
|
||||
|
||||
#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
|
||||
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
|
||||
Note this must be done AFTER the core pull request is merged, otherwise we may end up with unresolvable conflicts and schemas that are invalid prior to base pull request merge. You may create the schemas.getdbt.com pull request prior to merging the base pull request, but do not merge until afterward.
|
||||
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
|
||||
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
|
||||
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!
|
||||
|
||||
Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ from dbt.artifacts.resources.v1.macro import Macro, MacroArgument, MacroDependsO
|
||||
from dbt.artifacts.resources.v1.metric import (
|
||||
ConstantPropertyInput,
|
||||
ConversionTypeParams,
|
||||
CumulativeTypeParams,
|
||||
Metric,
|
||||
MetricConfig,
|
||||
MetricInput,
|
||||
@@ -45,7 +46,7 @@ from dbt.artifacts.resources.v1.metric import (
|
||||
MetricTimeWindow,
|
||||
MetricTypeParams,
|
||||
)
|
||||
from dbt.artifacts.resources.v1.model import Model, ModelConfig
|
||||
from dbt.artifacts.resources.v1.model import Model, ModelConfig, TimeSpine
|
||||
from dbt.artifacts.resources.v1.owner import Owner
|
||||
from dbt.artifacts.resources.v1.saved_query import (
|
||||
Export,
|
||||
|
||||
@@ -68,3 +68,10 @@ class TimePeriod(StrEnum):
|
||||
|
||||
def plural(self) -> str:
|
||||
return str(self) + "s"
|
||||
|
||||
|
||||
class BatchSize(StrEnum):
|
||||
hour = "hour"
|
||||
day = "day"
|
||||
month = "month"
|
||||
year = "year"
|
||||
|
||||
@@ -10,6 +10,7 @@ from dbt_common.contracts.config.properties import AdditionalPropertiesMixin
|
||||
from dbt_common.contracts.constraints import ColumnLevelConstraint
|
||||
from dbt_common.contracts.util import Mergeable
|
||||
from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, dbtClassMixin
|
||||
from dbt_semantic_interfaces.type_enums import TimeGranularity
|
||||
|
||||
NodeVersion = Union[str, float]
|
||||
|
||||
@@ -66,6 +67,7 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin):
|
||||
quote: Optional[bool] = None
|
||||
tags: List[str] = field(default_factory=list)
|
||||
_extra: Dict[str, Any] = field(default_factory=dict)
|
||||
granularity: Optional[TimeGranularity] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -192,6 +194,7 @@ class ParsedResource(ParsedResourceMandatory):
|
||||
unrendered_config: Dict[str, Any] = field(default_factory=dict)
|
||||
created_at: float = field(default_factory=lambda: time.time())
|
||||
config_call_dict: Dict[str, Any] = field(default_factory=dict)
|
||||
unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
|
||||
relation_name: Optional[str] = None
|
||||
raw_code: str = ""
|
||||
|
||||
@@ -199,6 +202,8 @@ class ParsedResource(ParsedResourceMandatory):
|
||||
dct = super().__post_serialize__(dct, context)
|
||||
if context and context.get("artifact") and "config_call_dict" in dct:
|
||||
del dct["config_call_dict"]
|
||||
if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
|
||||
del dct["unrendered_config_call_dict"]
|
||||
return dct
|
||||
|
||||
|
||||
|
||||
@@ -80,6 +80,9 @@ class NodeConfig(NodeAndTestConfig):
|
||||
# 'mergebehavior' dictionary
|
||||
materialized: str = "view"
|
||||
incremental_strategy: Optional[str] = None
|
||||
batch_size: Any = None
|
||||
lookback: Any = 1
|
||||
begin: Any = None
|
||||
persist_docs: Dict[str, Any] = field(default_factory=dict)
|
||||
post_hook: List[Hook] = field(
|
||||
default_factory=list,
|
||||
@@ -122,6 +125,8 @@ class NodeConfig(NodeAndTestConfig):
|
||||
default_factory=ContractConfig,
|
||||
metadata=MergeBehavior.Update.meta(),
|
||||
)
|
||||
event_time: Any = None
|
||||
concurrent_batches: Any = None
|
||||
|
||||
def __post_init__(self):
|
||||
# we validate that node_color has a suitable value to prevent dbt-docs from crashing
|
||||
|
||||
@@ -2,13 +2,6 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
|
||||
from dbt_semantic_interfaces.type_enums import (
|
||||
ConversionCalculationType,
|
||||
MetricType,
|
||||
TimeGranularity,
|
||||
)
|
||||
|
||||
from dbt.artifacts.resources.base import GraphResource
|
||||
from dbt.artifacts.resources.types import NodeType
|
||||
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
|
||||
@@ -18,6 +11,13 @@ from dbt.artifacts.resources.v1.semantic_layer_components import (
|
||||
)
|
||||
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
|
||||
from dbt_semantic_interfaces.type_enums import (
|
||||
ConversionCalculationType,
|
||||
MetricType,
|
||||
PeriodAggregation,
|
||||
TimeGranularity,
|
||||
)
|
||||
|
||||
"""
|
||||
The following classes are dataclasses which are used to construct the Metric
|
||||
@@ -80,6 +80,13 @@ class ConversionTypeParams(dbtClassMixin):
|
||||
constant_properties: Optional[List[ConstantPropertyInput]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CumulativeTypeParams(dbtClassMixin):
|
||||
window: Optional[MetricTimeWindow] = None
|
||||
grain_to_date: Optional[TimeGranularity] = None
|
||||
period_agg: PeriodAggregation = PeriodAggregation.FIRST
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricTypeParams(dbtClassMixin):
|
||||
measure: Optional[MetricInputMeasure] = None
|
||||
@@ -91,6 +98,7 @@ class MetricTypeParams(dbtClassMixin):
|
||||
grain_to_date: Optional[TimeGranularity] = None
|
||||
metrics: Optional[List[MetricInput]] = None
|
||||
conversion_type_params: Optional[ConversionTypeParams] = None
|
||||
cumulative_type_params: Optional[CumulativeTypeParams] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -113,6 +121,7 @@ class Metric(GraphResource):
|
||||
type_params: MetricTypeParams
|
||||
filter: Optional[WhereFilterIntersection] = None
|
||||
metadata: Optional[SourceFileMetadata] = None
|
||||
time_granularity: Optional[TimeGranularity] = None
|
||||
resource_type: Literal[NodeType.Metric]
|
||||
meta: Dict[str, Any] = field(default_factory=dict, metadata=MergeBehavior.Update.meta())
|
||||
tags: List[str] = field(default_factory=list)
|
||||
|
||||
@@ -11,6 +11,7 @@ from dbt.artifacts.resources.v1.components import (
|
||||
from dbt.artifacts.resources.v1.config import NodeConfig
|
||||
from dbt_common.contracts.config.base import MergeBehavior
|
||||
from dbt_common.contracts.constraints import ModelLevelConstraint
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -21,6 +22,18 @@ class ModelConfig(NodeConfig):
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CustomGranularity(dbtClassMixin):
|
||||
name: str
|
||||
column_name: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimeSpine(dbtClassMixin):
|
||||
standard_granularity_column: str
|
||||
custom_granularities: List[CustomGranularity] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Model(CompiledResource):
|
||||
resource_type: Literal[NodeType.Model]
|
||||
@@ -32,6 +45,7 @@ class Model(CompiledResource):
|
||||
deprecation_date: Optional[datetime] = None
|
||||
defer_relation: Optional[DeferRelation] = None
|
||||
primary_key: List[str] = field(default_factory=list)
|
||||
time_spine: Optional[TimeSpine] = None
|
||||
|
||||
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
||||
dct = super().__post_serialize__(dct, context)
|
||||
|
||||
@@ -4,10 +4,6 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
from dbt_semantic_interfaces.type_enums.export_destination_type import (
|
||||
ExportDestinationType,
|
||||
)
|
||||
|
||||
from dbt.artifacts.resources.base import GraphResource
|
||||
from dbt.artifacts.resources.types import NodeType
|
||||
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
|
||||
@@ -17,6 +13,9 @@ from dbt.artifacts.resources.v1.semantic_layer_components import (
|
||||
)
|
||||
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_semantic_interfaces.type_enums.export_destination_type import (
|
||||
ExportDestinationType,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -35,6 +34,7 @@ class Export(dbtClassMixin):
|
||||
|
||||
name: str
|
||||
config: ExportConfig
|
||||
unrendered_config: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -44,6 +44,8 @@ class QueryParams(dbtClassMixin):
|
||||
metrics: List[str]
|
||||
group_by: List[str]
|
||||
where: Optional[WhereFilterIntersection]
|
||||
order_by: List[str] = field(default_factory=list)
|
||||
limit: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Sequence, Tuple
|
||||
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
|
||||
from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import (
|
||||
WhereFilterParser,
|
||||
)
|
||||
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
class WhereFilter(dbtClassMixin):
|
||||
|
||||
@@ -2,6 +2,11 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
from dbt.artifacts.resources import SourceFileMetadata
|
||||
from dbt.artifacts.resources.base import GraphResource
|
||||
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
|
||||
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_semantic_interfaces.references import (
|
||||
DimensionReference,
|
||||
EntityReference,
|
||||
@@ -17,12 +22,6 @@ from dbt_semantic_interfaces.type_enums import (
|
||||
TimeGranularity,
|
||||
)
|
||||
|
||||
from dbt.artifacts.resources import SourceFileMetadata
|
||||
from dbt.artifacts.resources.base import GraphResource
|
||||
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
|
||||
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
|
||||
"""
|
||||
The classes in this file are dataclasses which are used to construct the Semantic
|
||||
Model node in dbt-core. Additionally, these classes need to at a minimum support
|
||||
|
||||
@@ -1,56 +1,74 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Literal, Optional, Union
|
||||
|
||||
from dbt.artifacts.resources.types import NodeType
|
||||
from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation
|
||||
from dbt.artifacts.resources.v1.config import NodeConfig
|
||||
from dbt_common.dataclass_schema import ValidationError
|
||||
from dbt_common.dataclass_schema import ValidationError, dbtClassMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotMetaColumnNames(dbtClassMixin):
|
||||
dbt_valid_to: Optional[str] = None
|
||||
dbt_valid_from: Optional[str] = None
|
||||
dbt_scd_id: Optional[str] = None
|
||||
dbt_updated_at: Optional[str] = None
|
||||
dbt_is_deleted: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotConfig(NodeConfig):
|
||||
materialized: str = "snapshot"
|
||||
strategy: Optional[str] = None
|
||||
unique_key: Optional[str] = None
|
||||
unique_key: Optional[Union[str, List[str]]] = None
|
||||
target_schema: Optional[str] = None
|
||||
target_database: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
# Not using Optional because of serialization issues with a Union of str and List[str]
|
||||
check_cols: Union[str, List[str], None] = None
|
||||
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
|
||||
default_factory=SnapshotMetaColumnNames
|
||||
)
|
||||
dbt_valid_to_current: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data):
|
||||
super().validate(data)
|
||||
# Note: currently you can't just set these keys in schema.yml because this validation
|
||||
# will fail when parsing the snapshot node.
|
||||
if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"):
|
||||
@property
|
||||
def snapshot_table_column_names(self):
|
||||
return {
|
||||
"dbt_valid_from": self.snapshot_meta_column_names.dbt_valid_from or "dbt_valid_from",
|
||||
"dbt_valid_to": self.snapshot_meta_column_names.dbt_valid_to or "dbt_valid_to",
|
||||
"dbt_scd_id": self.snapshot_meta_column_names.dbt_scd_id or "dbt_scd_id",
|
||||
"dbt_updated_at": self.snapshot_meta_column_names.dbt_updated_at or "dbt_updated_at",
|
||||
"dbt_is_deleted": self.snapshot_meta_column_names.dbt_is_deleted or "dbt_is_deleted",
|
||||
}
|
||||
|
||||
def final_validate(self):
|
||||
if not self.strategy or not self.unique_key:
|
||||
raise ValidationError(
|
||||
"Snapshots must be configured with a 'strategy', 'unique_key', "
|
||||
"and 'target_schema'."
|
||||
"Snapshots must be configured with a 'strategy' and 'unique_key'."
|
||||
)
|
||||
if data.get("strategy") == "check":
|
||||
if not data.get("check_cols"):
|
||||
if self.strategy == "check":
|
||||
if not self.check_cols:
|
||||
raise ValidationError(
|
||||
"A snapshot configured with the check strategy must "
|
||||
"specify a check_cols configuration."
|
||||
)
|
||||
if isinstance(data["check_cols"], str) and data["check_cols"] != "all":
|
||||
if isinstance(self.check_cols, str) and self.check_cols != "all":
|
||||
raise ValidationError(
|
||||
f"Invalid value for 'check_cols': {data['check_cols']}. "
|
||||
f"Invalid value for 'check_cols': {self.check_cols}. "
|
||||
"Expected 'all' or a list of strings."
|
||||
)
|
||||
elif data.get("strategy") == "timestamp":
|
||||
if not data.get("updated_at"):
|
||||
elif self.strategy == "timestamp":
|
||||
if not self.updated_at:
|
||||
raise ValidationError(
|
||||
"A snapshot configured with the timestamp strategy "
|
||||
"must specify an updated_at configuration."
|
||||
)
|
||||
if data.get("check_cols"):
|
||||
if self.check_cols:
|
||||
raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'")
|
||||
# If the strategy is not 'check' or 'timestamp' it's a custom strategy,
|
||||
# formerly supported with GenericSnapshotConfig
|
||||
|
||||
if data.get("materialized") and data.get("materialized") != "snapshot":
|
||||
if self.materialized and self.materialized != "snapshot":
|
||||
raise ValidationError("A snapshot must have a materialized value of 'snapshot'")
|
||||
|
||||
# Called by "calculate_node_config_dict" in ContextConfigGenerator
|
||||
|
||||
@@ -19,6 +19,7 @@ from dbt_common.exceptions import CompilationError
|
||||
@dataclass
|
||||
class SourceConfig(BaseConfig):
|
||||
enabled: bool = True
|
||||
event_time: Any = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -70,3 +71,5 @@ class SourceDefinition(ParsedSourceMandatory):
|
||||
unrendered_config: Dict[str, Any] = field(default_factory=dict)
|
||||
relation_name: Optional[str] = None
|
||||
created_at: float = field(default_factory=lambda: time.time())
|
||||
unrendered_database: Optional[str] = None
|
||||
unrendered_schema: Optional[str] = None
|
||||
|
||||
@@ -20,6 +20,7 @@ class UnitTestConfig(BaseConfig):
|
||||
default_factory=dict,
|
||||
metadata=MergeBehavior.Update.meta(),
|
||||
)
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class UnitTestFormat(StrEnum):
|
||||
|
||||
@@ -77,8 +77,11 @@ class BaseArtifactMetadata(dbtClassMixin):
|
||||
# remote-compile-result
|
||||
# remote-execution-result
|
||||
# remote-run-result
|
||||
S = TypeVar("S", bound="VersionedSchema")
|
||||
|
||||
|
||||
def schema_version(name: str, version: int):
|
||||
def inner(cls: Type[VersionedSchema]):
|
||||
def inner(cls: Type[S]):
|
||||
cls.dbt_schema_version = SchemaVersion(
|
||||
name=name,
|
||||
version=version,
|
||||
|
||||
24
core/dbt/artifacts/schemas/batch_results.py
Normal file
24
core/dbt/artifacts/schemas/batch_results.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import List, Tuple
|
||||
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
|
||||
BatchType = Tuple[datetime, datetime]
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchResults(dbtClassMixin):
|
||||
successful: List[BatchType] = field(default_factory=list)
|
||||
failed: List[BatchType] = field(default_factory=list)
|
||||
|
||||
def __add__(self, other: BatchResults) -> BatchResults:
|
||||
return BatchResults(
|
||||
successful=self.successful + other.successful,
|
||||
failed=self.failed + other.failed,
|
||||
)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.successful) + len(self.failed)
|
||||
@@ -1,2 +1,11 @@
|
||||
# alias to latest
|
||||
from dbt.artifacts.schemas.catalog.v1.catalog import * # noqa
|
||||
from dbt_common.contracts.metadata import (
|
||||
CatalogKey,
|
||||
CatalogTable,
|
||||
ColumnMap,
|
||||
ColumnMetadata,
|
||||
StatsDict,
|
||||
StatsItem,
|
||||
TableMetadata,
|
||||
)
|
||||
|
||||
@@ -1,71 +1,18 @@
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from dbt.artifacts.schemas.base import (
|
||||
ArtifactMixin,
|
||||
BaseArtifactMetadata,
|
||||
schema_version,
|
||||
)
|
||||
from dbt_common.contracts.metadata import CatalogTable
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_common.utils.formatting import lowercase
|
||||
|
||||
Primitive = Union[bool, str, float, None]
|
||||
PrimitiveDict = Dict[str, Primitive]
|
||||
|
||||
CatalogKey = NamedTuple(
|
||||
"CatalogKey", [("database", Optional[str]), ("schema", str), ("name", str)]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StatsItem(dbtClassMixin):
|
||||
id: str
|
||||
label: str
|
||||
value: Primitive
|
||||
include: bool
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
StatsDict = Dict[str, StatsItem]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ColumnMetadata(dbtClassMixin):
|
||||
type: str
|
||||
index: int
|
||||
name: str
|
||||
comment: Optional[str] = None
|
||||
|
||||
|
||||
ColumnMap = Dict[str, ColumnMetadata]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TableMetadata(dbtClassMixin):
|
||||
type: str
|
||||
schema: str
|
||||
name: str
|
||||
database: Optional[str] = None
|
||||
comment: Optional[str] = None
|
||||
owner: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogTable(dbtClassMixin):
|
||||
metadata: TableMetadata
|
||||
columns: ColumnMap
|
||||
stats: StatsDict
|
||||
# the same table with two unique IDs will just be listed two times
|
||||
unique_id: Optional[str] = None
|
||||
|
||||
def key(self) -> CatalogKey:
|
||||
return CatalogKey(
|
||||
lowercase(self.metadata.database),
|
||||
self.metadata.schema.lower(),
|
||||
self.metadata.name.lower(),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogMetadata(BaseArtifactMetadata):
|
||||
|
||||
@@ -10,6 +10,12 @@ from dbt_common.utils import cast_to_int, cast_to_str
|
||||
|
||||
@dataclass
|
||||
class TimingInfo(dbtClassMixin):
|
||||
"""
|
||||
Represents a step in the execution of a node.
|
||||
`name` should be one of: compile, execute, or other
|
||||
Do not call directly, use `collect_timing_info` instead.
|
||||
"""
|
||||
|
||||
name: str
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
@@ -21,7 +27,7 @@ class TimingInfo(dbtClassMixin):
|
||||
self.completed_at = datetime.utcnow()
|
||||
|
||||
def to_msg_dict(self):
|
||||
msg_dict = {"name": self.name}
|
||||
msg_dict = {"name": str(self.name)}
|
||||
if self.started_at:
|
||||
msg_dict["started_at"] = datetime_to_json_string(self.started_at)
|
||||
if self.completed_at:
|
||||
@@ -55,6 +61,7 @@ class NodeStatus(StrEnum):
|
||||
Fail = "fail"
|
||||
Warn = "warn"
|
||||
Skipped = "skipped"
|
||||
PartialSuccess = "partial success"
|
||||
Pass = "pass"
|
||||
RuntimeErr = "runtime error"
|
||||
|
||||
@@ -63,6 +70,7 @@ class RunStatus(StrEnum):
|
||||
Success = NodeStatus.Success
|
||||
Error = NodeStatus.Error
|
||||
Skipped = NodeStatus.Skipped
|
||||
PartialSuccess = NodeStatus.PartialSuccess
|
||||
|
||||
|
||||
class TestStatus(StrEnum):
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import threading
|
||||
from dataclasses import dataclass, field
|
||||
@@ -17,6 +19,7 @@ from dbt.artifacts.schemas.base import (
|
||||
get_artifact_schema_version,
|
||||
schema_version,
|
||||
)
|
||||
from dbt.artifacts.schemas.batch_results import BatchResults
|
||||
from dbt.artifacts.schemas.results import (
|
||||
BaseResult,
|
||||
ExecutionResult,
|
||||
@@ -34,6 +37,7 @@ class RunResult(NodeResult):
|
||||
agate_table: Optional["agate.Table"] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
batch_results: Optional[BatchResults] = None
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
@@ -51,6 +55,7 @@ class RunResult(NodeResult):
|
||||
node=node,
|
||||
adapter_response={},
|
||||
failures=None,
|
||||
batch_results=None,
|
||||
)
|
||||
|
||||
|
||||
@@ -67,6 +72,7 @@ class RunResultOutput(BaseResult):
|
||||
compiled: Optional[bool]
|
||||
compiled_code: Optional[str]
|
||||
relation_name: Optional[str]
|
||||
batch_results: Optional[BatchResults] = None
|
||||
|
||||
|
||||
def process_run_result(result: RunResult) -> RunResultOutput:
|
||||
@@ -82,6 +88,7 @@ def process_run_result(result: RunResult) -> RunResultOutput:
|
||||
message=result.message,
|
||||
adapter_response=result.adapter_response,
|
||||
failures=result.failures,
|
||||
batch_results=result.batch_results,
|
||||
compiled=result.node.compiled if compiled else None, # type:ignore
|
||||
compiled_code=result.node.compiled_code if compiled else None, # type:ignore
|
||||
relation_name=result.node.relation_name if compiled else None, # type:ignore
|
||||
@@ -158,7 +165,8 @@ class RunResultsArtifact(ExecutionResult, ArtifactMixin):
|
||||
@classmethod
|
||||
def upgrade_schema_version(cls, data):
|
||||
"""This overrides the "upgrade_schema_version" call in VersionedSchema (via
|
||||
ArtifactMixin) to modify the dictionary passed in from earlier versions of the run_results."""
|
||||
ArtifactMixin) to modify the dictionary passed in from earlier versions of the run_results.
|
||||
"""
|
||||
run_results_schema_version = get_artifact_schema_version(data)
|
||||
# If less than the current version (v5), preprocess contents to match latest schema version
|
||||
if run_results_schema_version <= 5:
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from typing import IO, Optional
|
||||
from typing import IO, List, Optional, Union
|
||||
|
||||
from click.exceptions import ClickException
|
||||
|
||||
from dbt.artifacts.schemas.catalog import CatalogArtifact
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.results import RunExecutionResult
|
||||
from dbt.utils import ExitCodes
|
||||
|
||||
|
||||
@@ -23,7 +26,7 @@ class CliException(ClickException):
|
||||
|
||||
# the typing of _file is to satisfy the signature of ClickException.show
|
||||
# overriding this method prevents click from printing any exceptions to stdout
|
||||
def show(self, _file: Optional[IO] = None) -> None:
|
||||
def show(self, _file: Optional[IO] = None) -> None: # type: ignore[type-arg]
|
||||
pass
|
||||
|
||||
|
||||
@@ -31,7 +34,17 @@ class ResultExit(CliException):
|
||||
"""This class wraps any exception that contains results while invoking dbt, or the
|
||||
results of an invocation that did not succeed but did not throw any exceptions."""
|
||||
|
||||
def __init__(self, result) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
result: Union[
|
||||
bool, # debug
|
||||
CatalogArtifact, # docs generate
|
||||
List[str], # list/ls
|
||||
Manifest, # parse
|
||||
None, # clean, deps, init, source
|
||||
RunExecutionResult, # build, compile, run, seed, snapshot, test, run-operation
|
||||
] = None,
|
||||
) -> None:
|
||||
super().__init__(ExitCodes.ModelError)
|
||||
self.result = result
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
from pprint import pformat as pf
|
||||
@@ -15,7 +16,7 @@ from dbt.cli.resolvers import default_log_path, default_project_dir
|
||||
from dbt.cli.types import Command as CliCommand
|
||||
from dbt.config.project import read_project_flags
|
||||
from dbt.contracts.project import ProjectFlags
|
||||
from dbt.deprecations import renamed_env_var
|
||||
from dbt.deprecations import fire_buffered_deprecations, renamed_env_var
|
||||
from dbt.events import ALL_EVENT_NAMES
|
||||
from dbt_common import ui
|
||||
from dbt_common.clients import jinja
|
||||
@@ -37,6 +38,7 @@ FLAGS_DEFAULTS = {
|
||||
"STRICT_MODE": False,
|
||||
"STORE_FAILURES": False,
|
||||
"INTROSPECT": True,
|
||||
"STATE_MODIFIED_COMPARE_VARS": False,
|
||||
}
|
||||
|
||||
DEPRECATED_PARAMS = {
|
||||
@@ -57,6 +59,7 @@ def convert_config(config_name, config_value):
|
||||
ret = WarnErrorOptions(
|
||||
include=config_value.get("include", []),
|
||||
exclude=config_value.get("exclude", []),
|
||||
silence=config_value.get("silence", []),
|
||||
valid_error_names=ALL_EVENT_NAMES,
|
||||
)
|
||||
return ret
|
||||
@@ -91,6 +94,8 @@ class Flags:
|
||||
# Set the default flags.
|
||||
for key, value in FLAGS_DEFAULTS.items():
|
||||
object.__setattr__(self, key, value)
|
||||
# Use to handle duplicate params in _assign_params
|
||||
flags_defaults_list = list(FLAGS_DEFAULTS.keys())
|
||||
|
||||
if ctx is None:
|
||||
ctx = get_current_context()
|
||||
@@ -172,13 +177,29 @@ class Flags:
|
||||
old_name=dep_param.envvar,
|
||||
new_name=new_param.envvar,
|
||||
)
|
||||
# end deprecated_params
|
||||
|
||||
# Set the flag value.
|
||||
is_duplicate = hasattr(self, param_name.upper())
|
||||
is_duplicate = (
|
||||
hasattr(self, param_name.upper())
|
||||
and param_name.upper() not in flags_defaults_list
|
||||
)
|
||||
# First time through, set as though FLAGS_DEFAULTS hasn't been set, so not a duplicate.
|
||||
# Subsequent pass (to process "parent" params) should be treated as duplicates.
|
||||
if param_name.upper() in flags_defaults_list:
|
||||
flags_defaults_list.remove(param_name.upper())
|
||||
# Note: the following determines whether parameter came from click default,
|
||||
# not from FLAGS_DEFAULTS in __init__.
|
||||
is_default = ctx.get_parameter_source(param_name) == ParameterSource.DEFAULT
|
||||
is_envvar = ctx.get_parameter_source(param_name) == ParameterSource.ENVIRONMENT
|
||||
|
||||
flag_name = (new_name or param_name).upper()
|
||||
|
||||
if (is_duplicate and not is_default) or not is_duplicate:
|
||||
# envvar flags are assigned in either parent or child context if there
|
||||
# isn't an overriding cli command flag.
|
||||
# If the flag has been encountered as a child cli flag, we don't
|
||||
# want to overwrite with parent envvar, since the commandline flag takes precedence.
|
||||
if (is_duplicate and not (is_default or is_envvar)) or not is_duplicate:
|
||||
object.__setattr__(self, flag_name, param_value)
|
||||
|
||||
# Track default assigned params.
|
||||
@@ -289,6 +310,13 @@ class Flags:
|
||||
params_assigned_from_default, ["WARN_ERROR", "WARN_ERROR_OPTIONS"]
|
||||
)
|
||||
|
||||
# Handle arguments mutually exclusive with INLINE
|
||||
self._assert_mutually_exclusive(params_assigned_from_default, ["SELECT", "INLINE"])
|
||||
self._assert_mutually_exclusive(params_assigned_from_default, ["SELECTOR", "INLINE"])
|
||||
|
||||
# Check event_time configs for validity
|
||||
self._validate_event_time_configs()
|
||||
|
||||
# Support lower cased access for legacy code.
|
||||
params = set(
|
||||
x for x in dir(self) if not callable(getattr(self, x)) and not x.startswith("__")
|
||||
@@ -315,7 +343,9 @@ class Flags:
|
||||
"""
|
||||
set_flag = None
|
||||
for flag in group:
|
||||
flag_set_by_user = flag.lower() not in params_assigned_from_default
|
||||
flag_set_by_user = (
|
||||
hasattr(self, flag) and flag.lower() not in params_assigned_from_default
|
||||
)
|
||||
if flag_set_by_user and set_flag:
|
||||
raise DbtUsageException(
|
||||
f"{flag.lower()}: not allowed with argument {set_flag.lower()}"
|
||||
@@ -323,6 +353,36 @@ class Flags:
|
||||
elif flag_set_by_user:
|
||||
set_flag = flag
|
||||
|
||||
def _validate_event_time_configs(self) -> None:
|
||||
event_time_start: datetime = (
|
||||
getattr(self, "EVENT_TIME_START") if hasattr(self, "EVENT_TIME_START") else None
|
||||
)
|
||||
event_time_end: datetime = (
|
||||
getattr(self, "EVENT_TIME_END") if hasattr(self, "EVENT_TIME_END") else None
|
||||
)
|
||||
|
||||
# only do validations if at least one of `event_time_start` or `event_time_end` are specified
|
||||
if event_time_start is not None or event_time_end is not None:
|
||||
|
||||
# These `ifs`, combined with the parent `if` make it so that `event_time_start` and
|
||||
# `event_time_end` are mutually required
|
||||
if event_time_start is None:
|
||||
raise DbtUsageException(
|
||||
"The flag `--event-time-end` was specified, but `--event-time-start` was not. "
|
||||
"When specifying `--event-time-end`, `--event-time-start` must also be present."
|
||||
)
|
||||
if event_time_end is None:
|
||||
raise DbtUsageException(
|
||||
"The flag `--event-time-start` was specified, but `--event-time-end` was not. "
|
||||
"When specifying `--event-time-start`, `--event-time-end` must also be present."
|
||||
)
|
||||
|
||||
# This `if` just is a sanity check that `event_time_start` is before `event_time_end`
|
||||
if event_time_start >= event_time_end:
|
||||
raise DbtUsageException(
|
||||
"Value for `--event-time-start` must be less than `--event-time-end`"
|
||||
)
|
||||
|
||||
def fire_deprecations(self):
|
||||
"""Fires events for deprecated env_var usage."""
|
||||
[dep_fn() for dep_fn in self.deprecated_env_var_warnings]
|
||||
@@ -330,6 +390,8 @@ class Flags:
|
||||
# not get pickled when written to disk as json.
|
||||
object.__delattr__(self, "deprecated_env_var_warnings")
|
||||
|
||||
fire_buffered_deprecations()
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, command: CliCommand, args_dict: Dict[str, Any]) -> "Flags":
|
||||
command_arg_list = command_params(command, args_dict)
|
||||
|
||||
@@ -8,12 +8,15 @@ from click.exceptions import BadOptionUsage
|
||||
from click.exceptions import Exit as ClickExit
|
||||
from click.exceptions import NoSuchOption, UsageError
|
||||
|
||||
from dbt.adapters.factory import register_adapter
|
||||
from dbt.artifacts.schemas.catalog import CatalogArtifact
|
||||
from dbt.artifacts.schemas.run import RunExecutionResult
|
||||
from dbt.cli import params as p
|
||||
from dbt.cli import requires
|
||||
from dbt.cli.exceptions import DbtInternalException, DbtUsageException
|
||||
from dbt.cli.requires import setup_manifest
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.mp_context import get_mp_context
|
||||
from dbt_common.events.base_types import EventMsg
|
||||
|
||||
|
||||
@@ -165,6 +168,8 @@ def cli(ctx, **kwargs):
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.empty
|
||||
@p.event_time_start
|
||||
@p.event_time_end
|
||||
@p.exclude
|
||||
@p.export_saved_queries
|
||||
@p.full_refresh
|
||||
@@ -218,10 +223,9 @@ def clean(ctx, **kwargs):
|
||||
"""Delete all folders in the clean-targets list (usually the dbt_packages and target directories.)"""
|
||||
from dbt.task.clean import CleanTask
|
||||
|
||||
task = CleanTask(ctx.obj["flags"], ctx.obj["project"])
|
||||
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
with CleanTask(ctx.obj["flags"], ctx.obj["project"]) as task:
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
return results, success
|
||||
|
||||
|
||||
@@ -274,6 +278,7 @@ def docs_generate(ctx, **kwargs):
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.browser
|
||||
@p.host
|
||||
@p.port
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@@ -352,6 +357,7 @@ def compile(ctx, **kwargs):
|
||||
@p.select
|
||||
@p.selector
|
||||
@p.inline
|
||||
@p.inline_direct
|
||||
@p.target_path
|
||||
@p.threads
|
||||
@p.vars
|
||||
@@ -360,17 +366,26 @@ def compile(ctx, **kwargs):
|
||||
@requires.profile
|
||||
@requires.project
|
||||
@requires.runtime_config
|
||||
@requires.manifest
|
||||
def show(ctx, **kwargs):
|
||||
"""Generates executable SQL for a named resource or inline query, runs that SQL, and returns a preview of the
|
||||
results. Does not materialize anything to the warehouse."""
|
||||
from dbt.task.show import ShowTask
|
||||
from dbt.task.show import ShowTask, ShowTaskDirect
|
||||
|
||||
task = ShowTask(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
ctx.obj["manifest"],
|
||||
)
|
||||
if ctx.obj["flags"].inline_direct:
|
||||
# Issue the inline query directly, with no templating. Does not require
|
||||
# loading the manifest.
|
||||
register_adapter(ctx.obj["runtime_config"], get_mp_context())
|
||||
task = ShowTaskDirect(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
)
|
||||
else:
|
||||
setup_manifest(ctx)
|
||||
task = ShowTask(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
ctx.obj["manifest"],
|
||||
)
|
||||
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
@@ -436,9 +451,9 @@ def deps(ctx, **kwargs):
|
||||
message=f"Version is required in --add-package when a package when source is {flags.SOURCE}",
|
||||
option_name="--add-package",
|
||||
)
|
||||
task = DepsTask(flags, ctx.obj["project"])
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
with DepsTask(flags, ctx.obj["project"]) as task:
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
return results, success
|
||||
|
||||
|
||||
@@ -458,10 +473,9 @@ def init(ctx, **kwargs):
|
||||
"""Initialize a new dbt project."""
|
||||
from dbt.task.init import InitTask
|
||||
|
||||
task = InitTask(ctx.obj["flags"])
|
||||
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
with InitTask(ctx.obj["flags"]) as task:
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
return results, success
|
||||
|
||||
|
||||
@@ -538,6 +552,8 @@ def parse(ctx, **kwargs):
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@p.empty
|
||||
@p.event_time_start
|
||||
@p.event_time_end
|
||||
@p.select
|
||||
@p.selector
|
||||
@p.target_path
|
||||
@@ -700,6 +716,7 @@ def seed(ctx, **kwargs):
|
||||
@cli.command("snapshot")
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.empty
|
||||
@p.exclude
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@@ -782,6 +799,8 @@ cli.commands["source"].add_command(snapshot_freshness, "snapshot-freshness") #
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.exclude
|
||||
@p.resource_type
|
||||
@p.exclude_resource_type
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@p.select
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from click import Choice, ParamType
|
||||
|
||||
from dbt.config.utils import exclusive_primary_alt_value_setting, parse_cli_yaml_string
|
||||
from dbt.config.utils import normalize_warn_error_options, parse_cli_yaml_string
|
||||
from dbt.events import ALL_EVENT_NAMES
|
||||
from dbt.exceptions import OptionNotYamlDictError, ValidationError
|
||||
from dbt_common.exceptions import DbtValidationError
|
||||
@@ -51,12 +51,7 @@ class WarnErrorOptionsType(YAML):
|
||||
def convert(self, value, param, ctx):
|
||||
# this function is being used by param in click
|
||||
include_exclude = super().convert(value, param, ctx)
|
||||
exclusive_primary_alt_value_setting(
|
||||
include_exclude, "include", "error", "warn_error_options"
|
||||
)
|
||||
exclusive_primary_alt_value_setting(
|
||||
include_exclude, "exclude", "warn", "warn_error_options"
|
||||
)
|
||||
normalize_warn_error_options(include_exclude)
|
||||
|
||||
return WarnErrorOptions(
|
||||
include=include_exclude.get("include", []),
|
||||
|
||||
@@ -91,6 +91,22 @@ empty = click.option(
|
||||
is_flag=True,
|
||||
)
|
||||
|
||||
event_time_end = click.option(
|
||||
"--event-time-end",
|
||||
envvar="DBT_EVENT_TIME_END",
|
||||
help="If specified, the end datetime dbt uses to filter microbatch model inputs (exclusive).",
|
||||
type=click.DateTime(),
|
||||
default=None,
|
||||
)
|
||||
|
||||
event_time_start = click.option(
|
||||
"--event-time-start",
|
||||
envvar="DBT_EVENT_TIME_START",
|
||||
help="If specified, the start datetime dbt uses to filter microbatch model inputs (inclusive).",
|
||||
type=click.DateTime(),
|
||||
default=None,
|
||||
)
|
||||
|
||||
exclude = click.option(
|
||||
"--exclude",
|
||||
envvar=None,
|
||||
@@ -135,6 +151,14 @@ full_refresh = click.option(
|
||||
is_flag=True,
|
||||
)
|
||||
|
||||
host = click.option(
|
||||
"--host",
|
||||
envvar="DBT_HOST",
|
||||
help="host to serve dbt docs on",
|
||||
type=click.STRING,
|
||||
default="127.0.0.1",
|
||||
)
|
||||
|
||||
indirect_selection = click.option(
|
||||
"--indirect-selection",
|
||||
envvar="DBT_INDIRECT_SELECTION",
|
||||
@@ -463,6 +487,13 @@ inline = click.option(
|
||||
help="Pass SQL inline to dbt compile and show",
|
||||
)
|
||||
|
||||
inline_direct = click.option(
|
||||
"--inline-direct",
|
||||
envvar=None,
|
||||
help="Internal flag to pass SQL inline to dbt show. Do not load the entire project or apply templating.",
|
||||
hidden=True,
|
||||
)
|
||||
|
||||
# `--select` and `--models` are analogous for most commands except `dbt list` for legacy reasons.
|
||||
# Most CLI arguments should use the combined `select` option that aliases `--models` to `--select`.
|
||||
# However, if you need to split out these separators (like `dbt ls`), use the `models` and `raw_select` options instead.
|
||||
|
||||
@@ -41,7 +41,13 @@ from dbt_common.events.functions import LOG_VERSION, fire_event
|
||||
from dbt_common.events.helpers import get_json_string_utcnow
|
||||
from dbt_common.exceptions import DbtBaseException as DbtException
|
||||
from dbt_common.invocation import reset_invocation_id
|
||||
from dbt_common.record import Recorder, RecorderMode, get_record_mode_from_env
|
||||
from dbt_common.record import (
|
||||
Recorder,
|
||||
RecorderMode,
|
||||
get_record_mode_from_env,
|
||||
get_record_types_from_dict,
|
||||
get_record_types_from_env,
|
||||
)
|
||||
from dbt_common.utils import cast_dict_to_dict_of_strings
|
||||
|
||||
|
||||
@@ -101,13 +107,23 @@ def preflight(func):
|
||||
|
||||
def setup_record_replay():
|
||||
rec_mode = get_record_mode_from_env()
|
||||
rec_types = get_record_types_from_env()
|
||||
|
||||
recorder: Optional[Recorder] = None
|
||||
if rec_mode == RecorderMode.REPLAY:
|
||||
recording_path = os.environ["DBT_REPLAY"]
|
||||
recorder = Recorder(RecorderMode.REPLAY, recording_path)
|
||||
previous_recording_path = os.environ.get("DBT_RECORDER_FILE_PATH")
|
||||
recorder = Recorder(
|
||||
RecorderMode.REPLAY, types=rec_types, previous_recording_path=previous_recording_path
|
||||
)
|
||||
elif rec_mode == RecorderMode.DIFF:
|
||||
previous_recording_path = os.environ.get("DBT_RECORDER_FILE_PATH")
|
||||
# ensure types match the previous recording
|
||||
types = get_record_types_from_dict(previous_recording_path)
|
||||
recorder = Recorder(
|
||||
RecorderMode.DIFF, types=types, previous_recording_path=previous_recording_path
|
||||
)
|
||||
elif rec_mode == RecorderMode.RECORD:
|
||||
recorder = Recorder(RecorderMode.RECORD)
|
||||
recorder = Recorder(RecorderMode.RECORD, types=rec_types)
|
||||
|
||||
get_invocation_context().recorder = recorder
|
||||
|
||||
@@ -116,7 +132,10 @@ def tear_down_record_replay():
|
||||
recorder = get_invocation_context().recorder
|
||||
if recorder is not None:
|
||||
if recorder.mode == RecorderMode.RECORD:
|
||||
recorder.write("recording.json")
|
||||
recorder.write()
|
||||
if recorder.mode == RecorderMode.DIFF:
|
||||
recorder.write()
|
||||
recorder.write_diffs(diff_file_name="recording_diffs.json")
|
||||
elif recorder.mode == RecorderMode.REPLAY:
|
||||
recorder.write_diffs("replay_diffs.json")
|
||||
|
||||
@@ -160,9 +179,11 @@ def postflight(func):
|
||||
process_in_blocks=rusage.ru_inblock,
|
||||
process_out_blocks=rusage.ru_oublock,
|
||||
),
|
||||
EventLevel.INFO
|
||||
if "flags" in ctx.obj and ctx.obj["flags"].SHOW_RESOURCE_REPORT
|
||||
else None,
|
||||
(
|
||||
EventLevel.INFO
|
||||
if "flags" in ctx.obj and ctx.obj["flags"].SHOW_RESOURCE_REPORT
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
fire_event(
|
||||
@@ -303,28 +324,7 @@ def manifest(*args0, write=True, write_perf_info=False):
|
||||
ctx = args[0]
|
||||
assert isinstance(ctx, Context)
|
||||
|
||||
req_strs = ["profile", "project", "runtime_config"]
|
||||
reqs = [ctx.obj.get(dep) for dep in req_strs]
|
||||
|
||||
if None in reqs:
|
||||
raise DbtProjectError("profile, project, and runtime_config required for manifest")
|
||||
|
||||
runtime_config = ctx.obj["runtime_config"]
|
||||
|
||||
# if a manifest has already been set on the context, don't overwrite it
|
||||
if ctx.obj.get("manifest") is None:
|
||||
ctx.obj["manifest"] = parse_manifest(
|
||||
runtime_config, write_perf_info, write, ctx.obj["flags"].write_json
|
||||
)
|
||||
else:
|
||||
register_adapter(runtime_config, get_mp_context())
|
||||
adapter = get_adapter(runtime_config)
|
||||
adapter.set_macro_context_generator(generate_runtime_macro_context)
|
||||
adapter.set_macro_resolver(ctx.obj["manifest"])
|
||||
query_header_context = generate_query_header_context(
|
||||
adapter.config, ctx.obj["manifest"]
|
||||
)
|
||||
adapter.connections.set_query_header(query_header_context)
|
||||
setup_manifest(ctx, write=write, write_perf_info=write_perf_info)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return update_wrapper(wrapper, func)
|
||||
@@ -334,3 +334,27 @@ def manifest(*args0, write=True, write_perf_info=False):
|
||||
if len(args0) == 0:
|
||||
return outer_wrapper
|
||||
return outer_wrapper(args0[0])
|
||||
|
||||
|
||||
def setup_manifest(ctx: Context, write: bool = True, write_perf_info: bool = False):
|
||||
"""Load the manifest and add it to the context."""
|
||||
req_strs = ["profile", "project", "runtime_config"]
|
||||
reqs = [ctx.obj.get(dep) for dep in req_strs]
|
||||
|
||||
if None in reqs:
|
||||
raise DbtProjectError("profile, project, and runtime_config required for manifest")
|
||||
|
||||
runtime_config = ctx.obj["runtime_config"]
|
||||
|
||||
# if a manifest has already been set on the context, don't overwrite it
|
||||
if ctx.obj.get("manifest") is None:
|
||||
ctx.obj["manifest"] = parse_manifest(
|
||||
runtime_config, write_perf_info, write, ctx.obj["flags"].write_json
|
||||
)
|
||||
else:
|
||||
register_adapter(runtime_config, get_mp_context())
|
||||
adapter = get_adapter(runtime_config)
|
||||
adapter.set_macro_context_generator(generate_runtime_macro_context) # type: ignore[arg-type]
|
||||
adapter.set_macro_resolver(ctx.obj["manifest"])
|
||||
query_header_context = generate_query_header_context(adapter.config, ctx.obj["manifest"]) # type: ignore[attr-defined]
|
||||
adapter.connections.set_query_header(query_header_context)
|
||||
|
||||
@@ -1,29 +1,50 @@
|
||||
from typing import Any, Dict, Optional
|
||||
import typing
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import jinja2
|
||||
|
||||
from dbt.exceptions import MacroNamespaceNotStringError
|
||||
from dbt.artifacts.resources import RefArgs
|
||||
from dbt.exceptions import MacroNamespaceNotStringError, ParsingError
|
||||
from dbt_common.clients.jinja import get_environment
|
||||
from dbt_common.exceptions.macros import MacroNameNotStringError
|
||||
from dbt_common.tests import test_caching_enabled
|
||||
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
|
||||
|
||||
_TESTING_MACRO_CACHE: Optional[Dict[str, Any]] = {}
|
||||
if typing.TYPE_CHECKING:
|
||||
from dbt.context.providers import ParseDatabaseWrapper
|
||||
|
||||
|
||||
def statically_extract_macro_calls(string, ctx, db_wrapper=None):
|
||||
_TESTING_MACRO_CACHE: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def statically_extract_has_name_this(source: str) -> bool:
|
||||
"""Checks whether the raw jinja has any references to `this`"""
|
||||
env = get_environment(None, capture_macros=True)
|
||||
parsed = env.parse(source)
|
||||
names = tuple(parsed.find_all(jinja2.nodes.Name))
|
||||
|
||||
for name in names:
|
||||
if hasattr(name, "name") and name.name == "this":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def statically_extract_macro_calls(
|
||||
source: str, ctx: Dict[str, Any], db_wrapper: Optional["ParseDatabaseWrapper"] = None
|
||||
) -> List[str]:
|
||||
# set 'capture_macros' to capture undefined
|
||||
env = get_environment(None, capture_macros=True)
|
||||
|
||||
global _TESTING_MACRO_CACHE
|
||||
if test_caching_enabled() and string in _TESTING_MACRO_CACHE:
|
||||
parsed = _TESTING_MACRO_CACHE.get(string, None)
|
||||
if test_caching_enabled() and source in _TESTING_MACRO_CACHE:
|
||||
parsed = _TESTING_MACRO_CACHE.get(source, None)
|
||||
func_calls = getattr(parsed, "_dbt_cached_calls")
|
||||
else:
|
||||
parsed = env.parse(string)
|
||||
parsed = env.parse(source)
|
||||
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))
|
||||
|
||||
if test_caching_enabled():
|
||||
_TESTING_MACRO_CACHE[string] = parsed
|
||||
_TESTING_MACRO_CACHE[source] = parsed
|
||||
setattr(parsed, "_dbt_cached_calls", func_calls)
|
||||
|
||||
standard_calls = ["source", "ref", "config"]
|
||||
@@ -67,30 +88,9 @@ def statically_extract_macro_calls(string, ctx, db_wrapper=None):
|
||||
return possible_macro_calls
|
||||
|
||||
|
||||
# Call(
|
||||
# node=Getattr(
|
||||
# node=Name(
|
||||
# name='adapter',
|
||||
# ctx='load'
|
||||
# ),
|
||||
# attr='dispatch',
|
||||
# ctx='load'
|
||||
# ),
|
||||
# args=[
|
||||
# Const(value='test_pkg_and_dispatch')
|
||||
# ],
|
||||
# kwargs=[
|
||||
# Keyword(
|
||||
# key='packages',
|
||||
# value=Call(node=Getattr(node=Name(name='local_utils', ctx='load'),
|
||||
# attr='_get_utils_namespaces', ctx='load'), args=[], kwargs=[],
|
||||
# dyn_args=None, dyn_kwargs=None)
|
||||
# )
|
||||
# ],
|
||||
# dyn_args=None,
|
||||
# dyn_kwargs=None
|
||||
# )
|
||||
def statically_parse_adapter_dispatch(func_call, ctx, db_wrapper):
|
||||
def statically_parse_adapter_dispatch(
|
||||
func_call, ctx: Dict[str, Any], db_wrapper: Optional["ParseDatabaseWrapper"]
|
||||
) -> List[str]:
|
||||
possible_macro_calls = []
|
||||
# This captures an adapter.dispatch('<macro_name>') call.
|
||||
|
||||
@@ -142,7 +142,7 @@ def statically_parse_adapter_dispatch(func_call, ctx, db_wrapper):
|
||||
|
||||
if db_wrapper:
|
||||
macro = db_wrapper.dispatch(func_name, macro_namespace=macro_namespace).macro
|
||||
func_name = f"{macro.package_name}.{macro.name}"
|
||||
func_name = f"{macro.package_name}.{macro.name}" # type: ignore[attr-defined]
|
||||
possible_macro_calls.append(func_name)
|
||||
else: # this is only for tests/unit/test_macro_calls.py
|
||||
if macro_namespace:
|
||||
@@ -153,3 +153,93 @@ def statically_parse_adapter_dispatch(func_call, ctx, db_wrapper):
|
||||
possible_macro_calls.append(f"{package_name}.{func_name}")
|
||||
|
||||
return possible_macro_calls
|
||||
|
||||
|
||||
def statically_parse_ref_or_source(expression: str) -> Union[RefArgs, List[str]]:
|
||||
"""
|
||||
Returns a RefArgs or List[str] object, corresponding to ref or source respectively, given an input jinja expression.
|
||||
|
||||
input: str representing how input node is referenced in tested model sql
|
||||
* examples:
|
||||
- "ref('my_model_a')"
|
||||
- "ref('my_model_a', version=3)"
|
||||
- "ref('package', 'my_model_a', version=3)"
|
||||
- "source('my_source_schema', 'my_source_name')"
|
||||
|
||||
If input is not a well-formed jinja ref or source expression, a ParsingError is raised.
|
||||
"""
|
||||
ref_or_source: Union[RefArgs, List[str]]
|
||||
|
||||
try:
|
||||
statically_parsed = py_extract_from_source(f"{{{{ {expression} }}}}")
|
||||
except ExtractionError:
|
||||
raise ParsingError(f"Invalid jinja expression: {expression}")
|
||||
|
||||
if statically_parsed.get("refs"):
|
||||
raw_ref = list(statically_parsed["refs"])[0]
|
||||
ref_or_source = RefArgs(
|
||||
package=raw_ref.get("package"),
|
||||
name=raw_ref.get("name"),
|
||||
version=raw_ref.get("version"),
|
||||
)
|
||||
elif statically_parsed.get("sources"):
|
||||
source_name, source_table_name = list(statically_parsed["sources"])[0]
|
||||
ref_or_source = [source_name, source_table_name]
|
||||
else:
|
||||
raise ParsingError(f"Invalid ref or source expression: {expression}")
|
||||
|
||||
return ref_or_source
|
||||
|
||||
|
||||
def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Given a string with jinja, extract an unrendered config call.
|
||||
If no config call is present, returns None.
|
||||
|
||||
For example, given:
|
||||
"{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id"
|
||||
returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"}
|
||||
|
||||
No config call:
|
||||
"select 1 as id"
|
||||
returns: None
|
||||
"""
|
||||
# Return early to avoid creating jinja environemt if no config call in input string
|
||||
if "config(" not in string:
|
||||
return None
|
||||
|
||||
# set 'capture_macros' to capture undefined
|
||||
env = get_environment(None, capture_macros=True)
|
||||
|
||||
global _TESTING_MACRO_CACHE
|
||||
if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE:
|
||||
parsed = _TESTING_MACRO_CACHE.get(string, None)
|
||||
func_calls = getattr(parsed, "_dbt_cached_calls")
|
||||
else:
|
||||
parsed = env.parse(string)
|
||||
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))
|
||||
|
||||
config_func_calls = list(
|
||||
filter(
|
||||
lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config",
|
||||
func_calls,
|
||||
)
|
||||
)
|
||||
# There should only be one {{ config(...) }} call per input
|
||||
config_func_call = config_func_calls[0] if config_func_calls else None
|
||||
|
||||
if not config_func_call:
|
||||
return None
|
||||
|
||||
unrendered_config = {}
|
||||
for kwarg in config_func_call.kwargs:
|
||||
unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg)
|
||||
|
||||
return unrendered_config
|
||||
|
||||
|
||||
def construct_static_kwarg_value(kwarg) -> str:
|
||||
# Instead of trying to re-assemble complex kwarg value, simply stringify the value.
|
||||
# This is still useful to be able to detect changes in unrendered configs, even if it is
|
||||
# not an exact representation of the user input.
|
||||
return str(kwarg)
|
||||
|
||||
@@ -21,6 +21,7 @@ from dbt.contracts.graph.nodes import (
|
||||
InjectedCTE,
|
||||
ManifestNode,
|
||||
ManifestSQLNode,
|
||||
ModelNode,
|
||||
SeedNode,
|
||||
UnitTestDefinition,
|
||||
UnitTestNode,
|
||||
@@ -29,12 +30,15 @@ from dbt.events.types import FoundStats, WritingInjectedSQLForNode
|
||||
from dbt.exceptions import (
|
||||
DbtInternalError,
|
||||
DbtRuntimeError,
|
||||
ForeignKeyConstraintToSyntaxError,
|
||||
GraphDependencyNotFoundError,
|
||||
ParsingError,
|
||||
)
|
||||
from dbt.flags import get_flags
|
||||
from dbt.graph import Graph
|
||||
from dbt.node_types import ModelLanguage, NodeType
|
||||
from dbt_common.clients.system import make_directory
|
||||
from dbt_common.contracts.constraints import ConstraintType
|
||||
from dbt_common.events.contextvars import get_node_info
|
||||
from dbt_common.events.format import pluralize
|
||||
from dbt_common.events.functions import fire_event
|
||||
@@ -371,7 +375,7 @@ class Compiler:
|
||||
|
||||
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
|
||||
|
||||
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
|
||||
new_cte_name = self.add_ephemeral_prefix(cte_model.identifier)
|
||||
rendered_sql = cte_model._pre_injected_sql or cte_model.compiled_code
|
||||
sql = f" {new_cte_name} as (\n{rendered_sql}\n)"
|
||||
|
||||
@@ -437,8 +441,31 @@ class Compiler:
|
||||
relation_name = str(relation_cls.create_from(self.config, node))
|
||||
node.relation_name = relation_name
|
||||
|
||||
# Compile 'ref' and 'source' expressions in foreign key constraints
|
||||
if isinstance(node, ModelNode):
|
||||
for constraint in node.all_constraints:
|
||||
if constraint.type == ConstraintType.foreign_key and constraint.to:
|
||||
constraint.to = self._compile_relation_for_foreign_key_constraint_to(
|
||||
manifest, node, constraint.to
|
||||
)
|
||||
|
||||
return node
|
||||
|
||||
def _compile_relation_for_foreign_key_constraint_to(
|
||||
self, manifest: Manifest, node: ManifestSQLNode, to_expression: str
|
||||
) -> str:
|
||||
try:
|
||||
foreign_key_node = manifest.find_node_from_ref_or_source(to_expression)
|
||||
except ParsingError:
|
||||
raise ForeignKeyConstraintToSyntaxError(node, to_expression)
|
||||
|
||||
if not foreign_key_node:
|
||||
raise GraphDependencyNotFoundError(node, to_expression)
|
||||
|
||||
adapter = get_adapter(self.config)
|
||||
relation_name = str(adapter.Relation.create_from(self.config, foreign_key_node))
|
||||
return relation_name
|
||||
|
||||
# This method doesn't actually "compile" any of the nodes. That is done by the
|
||||
# "compile_node" method. This creates a Linker and builds the networkx graph,
|
||||
# writes out the graph.gpickle file, and prints the stats, returning a Graph object.
|
||||
@@ -494,7 +521,9 @@ class Compiler:
|
||||
linker.write_graph(graph_path, manifest)
|
||||
|
||||
# writes the "compiled_code" into the target/compiled directory
|
||||
def _write_node(self, node: ManifestSQLNode) -> ManifestSQLNode:
|
||||
def _write_node(
|
||||
self, node: ManifestSQLNode, split_suffix: Optional[str] = None
|
||||
) -> ManifestSQLNode:
|
||||
if not node.extra_ctes_injected or node.resource_type in (
|
||||
NodeType.Snapshot,
|
||||
NodeType.Seed,
|
||||
@@ -503,7 +532,9 @@ class Compiler:
|
||||
fire_event(WritingInjectedSQLForNode(node_info=get_node_info()))
|
||||
|
||||
if node.compiled_code:
|
||||
node.compiled_path = node.get_target_write_path(self.config.target_path, "compiled")
|
||||
node.compiled_path = node.get_target_write_path(
|
||||
self.config.target_path, "compiled", split_suffix
|
||||
)
|
||||
node.write_node(self.config.project_root, node.compiled_path, node.compiled_code)
|
||||
return node
|
||||
|
||||
@@ -513,6 +544,7 @@ class Compiler:
|
||||
manifest: Manifest,
|
||||
extra_context: Optional[Dict[str, Any]] = None,
|
||||
write: bool = True,
|
||||
split_suffix: Optional[str] = None,
|
||||
) -> ManifestSQLNode:
|
||||
"""This is the main entry point into this code. It's called by
|
||||
CompileRunner.compile, GenericRPCRunner.compile, and
|
||||
@@ -520,6 +552,8 @@ class Compiler:
|
||||
the node's raw_code into compiled_code, and then calls the
|
||||
recursive method to "prepend" the ctes.
|
||||
"""
|
||||
# REVIEW: UnitTestDefinition shouldn't be possible here because of the
|
||||
# type of node, and it is likewise an invalid return type.
|
||||
if isinstance(node, UnitTestDefinition):
|
||||
return node
|
||||
|
||||
@@ -533,7 +567,7 @@ class Compiler:
|
||||
|
||||
node, _ = self._recursively_prepend_ctes(node, manifest, extra_context)
|
||||
if write:
|
||||
self._write_node(node)
|
||||
self._write_node(node, split_suffix=split_suffix)
|
||||
return node
|
||||
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from dbt import deprecations
|
||||
from dbt.adapters.contracts.connection import QueryComment
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.config.selectors import SelectorDict
|
||||
from dbt.config.utils import exclusive_primary_alt_value_setting
|
||||
from dbt.config.utils import normalize_warn_error_options
|
||||
from dbt.constants import (
|
||||
DBT_PROJECT_FILE_NAME,
|
||||
DEPENDENCIES_FILE_NAME,
|
||||
@@ -158,14 +158,8 @@ def _parse_versions(versions: Union[List[str], str]) -> List[VersionSpecifier]:
|
||||
return [VersionSpecifier.from_version_string(v) for v in versions]
|
||||
|
||||
|
||||
def _all_source_paths(
|
||||
model_paths: List[str],
|
||||
seed_paths: List[str],
|
||||
snapshot_paths: List[str],
|
||||
analysis_paths: List[str],
|
||||
macro_paths: List[str],
|
||||
) -> List[str]:
|
||||
paths = chain(model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths)
|
||||
def _all_source_paths(*args: List[str]) -> List[str]:
|
||||
paths = chain(*args)
|
||||
# Strip trailing slashes since the path is the same even though the name is not
|
||||
stripped_paths = map(lambda s: s.rstrip("/"), paths)
|
||||
return list(set(stripped_paths))
|
||||
@@ -409,7 +403,7 @@ class PartialProject(RenderComponents):
|
||||
snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ["snapshots"])
|
||||
|
||||
all_source_paths: List[str] = _all_source_paths(
|
||||
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths
|
||||
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths, test_paths
|
||||
)
|
||||
|
||||
docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
|
||||
@@ -480,6 +474,7 @@ class PartialProject(RenderComponents):
|
||||
rendered.selectors_dict["selectors"]
|
||||
)
|
||||
dbt_cloud = cfg.dbt_cloud
|
||||
flags: Dict[str, Any] = cfg.flags
|
||||
|
||||
project = Project(
|
||||
project_name=name,
|
||||
@@ -524,6 +519,7 @@ class PartialProject(RenderComponents):
|
||||
project_env_vars=project_env_vars,
|
||||
restrict_access=cfg.restrict_access,
|
||||
dbt_cloud=dbt_cloud,
|
||||
flags=flags,
|
||||
)
|
||||
# sanity check - this means an internal issue
|
||||
project.validate()
|
||||
@@ -568,11 +564,6 @@ class PartialProject(RenderComponents):
|
||||
) = package_and_project_data_from_root(project_root)
|
||||
selectors_dict = selector_data_from_root(project_root)
|
||||
|
||||
if "flags" in project_dict:
|
||||
# We don't want to include "flags" in the Project,
|
||||
# it goes in ProjectFlags
|
||||
project_dict.pop("flags")
|
||||
|
||||
return cls.from_dicts(
|
||||
project_root=project_root,
|
||||
project_dict=project_dict,
|
||||
@@ -645,6 +636,7 @@ class Project:
|
||||
project_env_vars: Dict[str, Any]
|
||||
restrict_access: bool
|
||||
dbt_cloud: Dict[str, Any]
|
||||
flags: Dict[str, Any]
|
||||
|
||||
@property
|
||||
def all_source_paths(self) -> List[str]:
|
||||
@@ -654,6 +646,7 @@ class Project:
|
||||
self.snapshot_paths,
|
||||
self.analysis_paths,
|
||||
self.macro_paths,
|
||||
self.test_paths,
|
||||
)
|
||||
|
||||
@property
|
||||
@@ -724,6 +717,7 @@ class Project:
|
||||
"require-dbt-version": [v.to_version_string() for v in self.dbt_version],
|
||||
"restrict-access": self.restrict_access,
|
||||
"dbt-cloud": self.dbt_cloud,
|
||||
"flags": self.flags,
|
||||
}
|
||||
)
|
||||
if self.query_comment:
|
||||
@@ -821,20 +815,15 @@ def read_project_flags(project_dir: str, profiles_dir: str) -> ProjectFlags:
|
||||
|
||||
if profile_project_flags:
|
||||
# This can't use WARN_ERROR or WARN_ERROR_OPTIONS because they're in
|
||||
# the config that we're loading. Uses special "warn" method.
|
||||
deprecations.warn("project-flags-moved")
|
||||
# the config that we're loading. Uses special "buffer" method and fired after flags are initialized in preflight.
|
||||
deprecations.buffer("project-flags-moved")
|
||||
project_flags = profile_project_flags
|
||||
|
||||
if project_flags is not None:
|
||||
# handle collapsing `include` and `error` as well as collapsing `exclude` and `warn`
|
||||
# for warn_error_options
|
||||
warn_error_options = project_flags.get("warn_error_options")
|
||||
exclusive_primary_alt_value_setting(
|
||||
warn_error_options, "include", "error", "warn_error_options"
|
||||
)
|
||||
exclusive_primary_alt_value_setting(
|
||||
warn_error_options, "exclude", "warn", "warn_error_options"
|
||||
)
|
||||
warn_error_options = project_flags.get("warn_error_options", {})
|
||||
normalize_warn_error_options(warn_error_options)
|
||||
|
||||
ProjectFlags.validate(project_flags)
|
||||
return ProjectFlags.from_dict(project_flags)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import itertools
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
@@ -15,6 +16,8 @@ from typing import (
|
||||
Type,
|
||||
)
|
||||
|
||||
import pytz
|
||||
|
||||
from dbt import tracking
|
||||
from dbt.adapters.contracts.connection import (
|
||||
AdapterRequiredConfig,
|
||||
@@ -98,6 +101,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
profile_name: str
|
||||
cli_vars: Dict[str, Any]
|
||||
dependencies: Optional[Mapping[str, "RuntimeConfig"]] = None
|
||||
invoked_at: datetime = field(default_factory=lambda: datetime.now(pytz.UTC))
|
||||
|
||||
def __post_init__(self):
|
||||
self.validate()
|
||||
@@ -193,6 +197,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
log_cache_events=log_cache_events,
|
||||
dependencies=dependencies,
|
||||
dbt_cloud=project.dbt_cloud,
|
||||
flags=project.flags,
|
||||
)
|
||||
|
||||
# Called by 'load_projects' in this class
|
||||
@@ -290,9 +295,9 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
project_name=self.project_name,
|
||||
project_id=self.hashed_name(),
|
||||
user_id=tracking.active_user.id if tracking.active_user else None,
|
||||
send_anonymous_usage_stats=get_flags().SEND_ANONYMOUS_USAGE_STATS
|
||||
if tracking.active_user
|
||||
else None,
|
||||
send_anonymous_usage_stats=(
|
||||
get_flags().SEND_ANONYMOUS_USAGE_STATS if tracking.active_user else None
|
||||
),
|
||||
adapter_type=self.credentials.type,
|
||||
)
|
||||
|
||||
|
||||
@@ -49,5 +49,18 @@ def exclusive_primary_alt_value_setting(
|
||||
f"Only `{alt}` or `{primary}` can be specified{where}, not both"
|
||||
)
|
||||
|
||||
if alt_options:
|
||||
dictionary[primary] = alt_options
|
||||
if alt in dictionary:
|
||||
alt_value = dictionary.pop(alt)
|
||||
dictionary[primary] = alt_value
|
||||
|
||||
|
||||
def normalize_warn_error_options(warn_error_options: Dict[str, Any]) -> None:
|
||||
exclusive_primary_alt_value_setting(
|
||||
warn_error_options, "include", "error", "warn_error_options"
|
||||
)
|
||||
exclusive_primary_alt_value_setting(
|
||||
warn_error_options, "exclude", "warn", "warn_error_options"
|
||||
)
|
||||
for key in ("include", "exclude", "silence"):
|
||||
if key in warn_error_options and warn_error_options[key] is None:
|
||||
warn_error_options[key] = []
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from dbt_semantic_interfaces.type_enums import TimeGranularity
|
||||
|
||||
DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER"
|
||||
|
||||
SECRET_PLACEHOLDER = "$$$DBT_SECRET_START$$${}$$$DBT_SECRET_END$$$"
|
||||
@@ -15,5 +17,8 @@ DEPENDENCIES_FILE_NAME = "dependencies.yml"
|
||||
PACKAGE_LOCK_FILE_NAME = "package-lock.yml"
|
||||
MANIFEST_FILE_NAME = "manifest.json"
|
||||
SEMANTIC_MANIFEST_FILE_NAME = "semantic_manifest.json"
|
||||
LEGACY_TIME_SPINE_MODEL_NAME = "metricflow_time_spine"
|
||||
LEGACY_TIME_SPINE_GRANULARITY = TimeGranularity.DAY
|
||||
MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY = TimeGranularity.DAY
|
||||
PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack"
|
||||
PACKAGE_LOCK_HASH_KEY = "sha1_hash"
|
||||
|
||||
@@ -6,9 +6,12 @@ from typing import Any, Dict, Generic, Iterator, List, Optional, TypeVar
|
||||
from dbt.adapters.factory import get_config_class_by_name
|
||||
from dbt.config import IsFQNResource, Project, RuntimeConfig
|
||||
from dbt.contracts.graph.model_config import get_config_for
|
||||
from dbt.exceptions import SchemaConfigError
|
||||
from dbt.flags import get_flags
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.utils import fqn_search
|
||||
from dbt_common.contracts.config.base import BaseConfig, _listify
|
||||
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
|
||||
from dbt_common.dataclass_schema import ValidationError
|
||||
from dbt_common.exceptions import DbtInternalError
|
||||
|
||||
|
||||
@@ -27,8 +30,7 @@ class ConfigSource:
|
||||
def __init__(self, project):
|
||||
self.project = project
|
||||
|
||||
def get_config_dict(self, resource_type: NodeType):
|
||||
...
|
||||
def get_config_dict(self, resource_type: NodeType): ...
|
||||
|
||||
|
||||
class UnrenderedConfig(ConfigSource):
|
||||
@@ -130,12 +132,12 @@ class BaseContextConfigGenerator(Generic[T]):
|
||||
return self._project_configs(self._active_project, fqn, resource_type)
|
||||
|
||||
@abstractmethod
|
||||
def _update_from_config(self, result: T, partial: Dict[str, Any], validate: bool = False) -> T:
|
||||
...
|
||||
def _update_from_config(
|
||||
self, result: T, partial: Dict[str, Any], validate: bool = False
|
||||
) -> T: ...
|
||||
|
||||
@abstractmethod
|
||||
def initial_result(self, resource_type: NodeType, base: bool) -> T:
|
||||
...
|
||||
def initial_result(self, resource_type: NodeType, base: bool) -> T: ...
|
||||
|
||||
def calculate_node_config(
|
||||
self,
|
||||
@@ -181,8 +183,7 @@ class BaseContextConfigGenerator(Generic[T]):
|
||||
project_name: str,
|
||||
base: bool,
|
||||
patch_config_dict: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
...
|
||||
) -> Dict[str, Any]: ...
|
||||
|
||||
|
||||
class ContextConfigGenerator(BaseContextConfigGenerator[C]):
|
||||
@@ -238,8 +239,12 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
|
||||
base=base,
|
||||
patch_config_dict=patch_config_dict,
|
||||
)
|
||||
finalized = config.finalize_and_validate()
|
||||
return finalized.to_dict(omit_none=True)
|
||||
try:
|
||||
finalized = config.finalize_and_validate()
|
||||
return finalized.to_dict(omit_none=True)
|
||||
except ValidationError as exc:
|
||||
# we got a ValidationError - probably bad types in config()
|
||||
raise SchemaConfigError(exc, node=config) from exc
|
||||
|
||||
|
||||
class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]):
|
||||
@@ -288,6 +293,7 @@ class ContextConfig:
|
||||
project_name: str,
|
||||
) -> None:
|
||||
self._config_call_dict: Dict[str, Any] = {}
|
||||
self._unrendered_config_call_dict: Dict[str, Any] = {}
|
||||
self._active_project = active_project
|
||||
self._fqn = fqn
|
||||
self._resource_type = resource_type
|
||||
@@ -295,55 +301,11 @@ class ContextConfig:
|
||||
|
||||
def add_config_call(self, opts: Dict[str, Any]) -> None:
|
||||
dct = self._config_call_dict
|
||||
self._add_config_call(dct, opts)
|
||||
merge_config_dicts(dct, opts)
|
||||
|
||||
@classmethod
|
||||
def _add_config_call(cls, config_call_dict, opts: Dict[str, Any]) -> None:
|
||||
# config_call_dict is already encountered configs, opts is new
|
||||
# This mirrors code in _merge_field_value in model_config.py which is similar but
|
||||
# operates on config objects.
|
||||
for k, v in opts.items():
|
||||
# MergeBehavior for post-hook and pre-hook is to collect all
|
||||
# values, instead of overwriting
|
||||
if k in BaseConfig.mergebehavior["append"]:
|
||||
if not isinstance(v, list):
|
||||
v = [v]
|
||||
if k in config_call_dict: # should always be a list here
|
||||
config_call_dict[k].extend(v)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
|
||||
elif k in BaseConfig.mergebehavior["update"]:
|
||||
if not isinstance(v, dict):
|
||||
raise DbtInternalError(f"expected dict, got {v}")
|
||||
if k in config_call_dict and isinstance(config_call_dict[k], dict):
|
||||
config_call_dict[k].update(v)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
elif k in BaseConfig.mergebehavior["dict_key_append"]:
|
||||
if not isinstance(v, dict):
|
||||
raise DbtInternalError(f"expected dict, got {v}")
|
||||
if k in config_call_dict: # should always be a dict
|
||||
for key, value in v.items():
|
||||
extend = False
|
||||
# This might start with a +, to indicate we should extend the list
|
||||
# instead of just clobbering it
|
||||
if key.startswith("+"):
|
||||
extend = True
|
||||
if key in config_call_dict[k] and extend:
|
||||
# extend the list
|
||||
config_call_dict[k][key].extend(_listify(value))
|
||||
else:
|
||||
# clobber the list
|
||||
config_call_dict[k][key] = _listify(value)
|
||||
else:
|
||||
# This is always a dictionary
|
||||
config_call_dict[k] = v
|
||||
# listify everything
|
||||
for key, value in config_call_dict[k].items():
|
||||
config_call_dict[k][key] = _listify(value)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None:
|
||||
# Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types.
|
||||
self._unrendered_config_call_dict.update(opts)
|
||||
|
||||
def build_config_dict(
|
||||
self,
|
||||
@@ -355,12 +317,24 @@ class ContextConfig:
|
||||
if rendered:
|
||||
# TODO CT-211
|
||||
src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated]
|
||||
config_call_dict = self._config_call_dict
|
||||
else:
|
||||
# TODO CT-211
|
||||
src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment]
|
||||
|
||||
# preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict
|
||||
if get_flags().state_modified_compare_more_unrendered_values is False:
|
||||
config_call_dict = self._config_call_dict
|
||||
else:
|
||||
# Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not,
|
||||
# as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc)
|
||||
if self._config_call_dict and not self._unrendered_config_call_dict:
|
||||
config_call_dict = self._config_call_dict
|
||||
else:
|
||||
config_call_dict = self._unrendered_config_call_dict
|
||||
|
||||
return src.calculate_node_config_dict(
|
||||
config_call_dict=self._config_call_dict,
|
||||
config_call_dict=config_call_dict,
|
||||
fqn=self._fqn,
|
||||
resource_type=self._resource_type,
|
||||
project_name=self._project_name,
|
||||
|
||||
@@ -8,7 +8,7 @@ from dbt.adapters.exceptions import (
|
||||
RelationWrongTypeError,
|
||||
)
|
||||
from dbt.adapters.exceptions.cache import CacheInconsistencyError
|
||||
from dbt.events.types import JinjaLogWarning
|
||||
from dbt.events.types import JinjaLogWarning, SnapshotTimestampWarning
|
||||
from dbt.exceptions import (
|
||||
AmbiguousAliasError,
|
||||
AmbiguousCatalogMatchError,
|
||||
@@ -116,6 +116,17 @@ def raise_fail_fast_error(msg, node=None) -> NoReturn:
|
||||
raise FailFastError(msg, node=node)
|
||||
|
||||
|
||||
def warn_snapshot_timestamp_data_types(
|
||||
snapshot_time_data_type: str, updated_at_data_type: str
|
||||
) -> None:
|
||||
warn_or_error(
|
||||
SnapshotTimestampWarning(
|
||||
snapshot_time_data_type=snapshot_time_data_type,
|
||||
updated_at_data_type=updated_at_data_type,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Update this when a new function should be added to the
|
||||
# dbt context's `exceptions` key!
|
||||
CONTEXT_EXPORTS = {
|
||||
@@ -141,6 +152,7 @@ CONTEXT_EXPORTS = {
|
||||
raise_contract_error,
|
||||
column_type_missing,
|
||||
raise_fail_fast_error,
|
||||
warn_snapshot_timestamp_data_types,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ from typing_extensions import Protocol
|
||||
|
||||
from dbt import selected_resources
|
||||
from dbt.adapters.base.column import Column
|
||||
from dbt.adapters.base.relation import EventTimeFilter
|
||||
from dbt.adapters.contracts.connection import AdapterResponse
|
||||
from dbt.adapters.exceptions import MissingConfigError
|
||||
from dbt.adapters.factory import (
|
||||
@@ -27,13 +28,14 @@ from dbt.adapters.factory import (
|
||||
get_adapter_package_names,
|
||||
get_adapter_type_names,
|
||||
)
|
||||
from dbt.artifacts.resources import NodeVersion, RefArgs
|
||||
from dbt.artifacts.resources import NodeConfig, NodeVersion, RefArgs, SourceConfig
|
||||
from dbt.clients.jinja import (
|
||||
MacroGenerator,
|
||||
MacroStack,
|
||||
UnitTestMacroGenerator,
|
||||
get_rendered,
|
||||
)
|
||||
from dbt.clients.jinja_static import statically_parse_unrendered_config
|
||||
from dbt.config import IsFQNResource, Project, RuntimeConfig
|
||||
from dbt.constants import DEFAULT_ENV_PLACEHOLDER
|
||||
from dbt.context.base import Var, contextmember, contextproperty
|
||||
@@ -50,6 +52,7 @@ from dbt.contracts.graph.nodes import (
|
||||
Exposure,
|
||||
Macro,
|
||||
ManifestNode,
|
||||
ModelNode,
|
||||
Resource,
|
||||
SeedNode,
|
||||
SemanticModel,
|
||||
@@ -76,6 +79,8 @@ from dbt.exceptions import (
|
||||
SecretEnvVarLocationError,
|
||||
TargetNotFoundError,
|
||||
)
|
||||
from dbt.flags import get_flags
|
||||
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
|
||||
from dbt.node_types import ModelLanguage, NodeType
|
||||
from dbt.utils import MultiDict, args_to_dict
|
||||
from dbt_common.clients.jinja import MacroProtocol
|
||||
@@ -230,6 +235,29 @@ class BaseResolver(metaclass=abc.ABCMeta):
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
return 0 if getattr(self.config.args, "EMPTY", False) else None
|
||||
|
||||
def resolve_event_time_filter(self, target: ManifestNode) -> Optional[EventTimeFilter]:
|
||||
event_time_filter = None
|
||||
if (
|
||||
(isinstance(target.config, NodeConfig) or isinstance(target.config, SourceConfig))
|
||||
and target.config.event_time
|
||||
and isinstance(self.model, ModelNode)
|
||||
and self.model.config.materialized == "incremental"
|
||||
and self.model.config.incremental_strategy == "microbatch"
|
||||
and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
|
||||
and self.model.batch is not None
|
||||
):
|
||||
start = self.model.batch.event_time_start
|
||||
end = self.model.batch.event_time_end
|
||||
|
||||
if start is not None or end is not None:
|
||||
event_time_filter = EventTimeFilter(
|
||||
field_name=target.config.event_time,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
return event_time_filter
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
|
||||
pass
|
||||
@@ -239,8 +267,7 @@ class BaseRefResolver(BaseResolver):
|
||||
@abc.abstractmethod
|
||||
def resolve(
|
||||
self, name: str, package: Optional[str] = None, version: Optional[NodeVersion] = None
|
||||
) -> RelationProxy:
|
||||
...
|
||||
) -> RelationProxy: ...
|
||||
|
||||
def _repack_args(
|
||||
self, name: str, package: Optional[str], version: Optional[NodeVersion]
|
||||
@@ -306,8 +333,7 @@ class BaseSourceResolver(BaseResolver):
|
||||
|
||||
class BaseMetricResolver(BaseResolver):
|
||||
@abc.abstractmethod
|
||||
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
|
||||
...
|
||||
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference: ...
|
||||
|
||||
def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
|
||||
if package is None:
|
||||
@@ -341,8 +367,7 @@ class BaseMetricResolver(BaseResolver):
|
||||
|
||||
|
||||
class Config(Protocol):
|
||||
def __init__(self, model, context_config: Optional[ContextConfig]):
|
||||
...
|
||||
def __init__(self, model, context_config: Optional[ContextConfig]): ...
|
||||
|
||||
|
||||
# Implementation of "config(..)" calls in models
|
||||
@@ -374,6 +399,14 @@ class ParseConfigObject(Config):
|
||||
# not call it!
|
||||
if self.context_config is None:
|
||||
raise DbtRuntimeError("At parse time, did not receive a context config")
|
||||
|
||||
# Track unrendered opts to build parsed node unrendered_config later on
|
||||
if get_flags().state_modified_compare_more_unrendered_values:
|
||||
unrendered_config = statically_parse_unrendered_config(self.model.raw_code)
|
||||
if unrendered_config:
|
||||
self.context_config.add_unrendered_config_call(unrendered_config)
|
||||
|
||||
# Use rendered opts to populate context_config
|
||||
self.context_config.add_config_call(opts)
|
||||
return ""
|
||||
|
||||
@@ -548,7 +581,11 @@ class RuntimeRefResolver(BaseRefResolver):
|
||||
def create_relation(self, target_model: ManifestNode) -> RelationProxy:
|
||||
if target_model.is_ephemeral_model:
|
||||
self.model.set_cte(target_model.unique_id, None)
|
||||
return self.Relation.create_ephemeral_from(target_model, limit=self.resolve_limit)
|
||||
return self.Relation.create_ephemeral_from(
|
||||
target_model,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
elif (
|
||||
hasattr(target_model, "defer_relation")
|
||||
and target_model.defer_relation
|
||||
@@ -566,10 +603,18 @@ class RuntimeRefResolver(BaseRefResolver):
|
||||
)
|
||||
):
|
||||
return self.Relation.create_from(
|
||||
self.config, target_model.defer_relation, limit=self.resolve_limit
|
||||
self.config,
|
||||
target_model.defer_relation,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
else:
|
||||
return self.Relation.create_from(self.config, target_model, limit=self.resolve_limit)
|
||||
return self.Relation.create_from(
|
||||
self.config,
|
||||
target_model,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
|
||||
def validate(
|
||||
self,
|
||||
@@ -603,6 +648,11 @@ class OperationRefResolver(RuntimeRefResolver):
|
||||
|
||||
|
||||
class RuntimeUnitTestRefResolver(RuntimeRefResolver):
|
||||
@property
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
|
||||
return None
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
target_name: str,
|
||||
@@ -636,10 +686,27 @@ class RuntimeSourceResolver(BaseSourceResolver):
|
||||
target_kind="source",
|
||||
disabled=(isinstance(target_source, Disabled)),
|
||||
)
|
||||
return self.Relation.create_from(self.config, target_source, limit=self.resolve_limit)
|
||||
|
||||
# Source quoting does _not_ respect global configs in dbt_project.yml, as documented here:
|
||||
# https://docs.getdbt.com/reference/project-configs/quoting
|
||||
# Use an object with an empty quoting field to bypass any settings in self.
|
||||
class SourceQuotingBaseConfig:
|
||||
quoting: Dict[str, Any] = {}
|
||||
|
||||
return self.Relation.create_from(
|
||||
SourceQuotingBaseConfig(),
|
||||
target_source,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_source),
|
||||
)
|
||||
|
||||
|
||||
class RuntimeUnitTestSourceResolver(BaseSourceResolver):
|
||||
@property
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
|
||||
return None
|
||||
|
||||
def resolve(self, source_name: str, table_name: str):
|
||||
target_source = self.manifest.resolve_source(
|
||||
source_name,
|
||||
@@ -936,7 +1003,20 @@ class ProviderContext(ManifestContext):
|
||||
# macros/source defs aren't 'writeable'.
|
||||
if isinstance(self.model, (Macro, SourceDefinition)):
|
||||
raise MacrosSourcesUnWriteableError(node=self.model)
|
||||
self.model.build_path = self.model.get_target_write_path(self.config.target_path, "run")
|
||||
|
||||
split_suffix = None
|
||||
if (
|
||||
isinstance(self.model, ModelNode)
|
||||
and self.model.config.get("incremental_strategy") == "microbatch"
|
||||
):
|
||||
split_suffix = MicrobatchBuilder.format_batch_start(
|
||||
self.model.config.get("__dbt_internal_microbatch_event_time_start"),
|
||||
self.model.config.batch_size,
|
||||
)
|
||||
|
||||
self.model.build_path = self.model.get_target_write_path(
|
||||
self.config.target_path, "run", split_suffix=split_suffix
|
||||
)
|
||||
self.model.write_node(self.config.project_root, self.model.build_path, payload)
|
||||
return ""
|
||||
|
||||
@@ -977,7 +1057,8 @@ class ProviderContext(ManifestContext):
|
||||
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
|
||||
except ValueError as e:
|
||||
raise LoadAgateTableValueError(e, node=self.model)
|
||||
table.original_abspath = os.path.abspath(path)
|
||||
# this is used by some adapters
|
||||
table.original_abspath = os.path.abspath(path) # type: ignore
|
||||
return table
|
||||
|
||||
@contextproperty()
|
||||
@@ -1599,7 +1680,7 @@ class UnitTestContext(ModelContext):
|
||||
if self.model.this_input_node_unique_id:
|
||||
this_node = self.manifest.expect(self.model.this_input_node_unique_id)
|
||||
self.model.set_cte(this_node.unique_id, None) # type: ignore
|
||||
return self.adapter.Relation.add_ephemeral_prefix(this_node.name)
|
||||
return self.adapter.Relation.add_ephemeral_prefix(this_node.identifier) # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
## Artifacts
|
||||
|
||||
### Generating JSON schemas
|
||||
A helper script, `sripts/collect-artifact-schema.py` is available to generate json schemas corresponding to versioned artifacts (`ArtifactMixin`s).
|
||||
A helper script, `scripts/collect-artifact-schema.py` is available to generate json schemas corresponding to versioned artifacts (`ArtifactMixin`s).
|
||||
|
||||
This script is necessary to run when a new artifact schema version is created, or when changes are made to existing artifact versions, and writes json schema to `schema/dbt/<artifact>/v<version>.json`.
|
||||
|
||||
|
||||
@@ -192,8 +192,14 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
sources: List[str] = field(default_factory=list)
|
||||
exposures: List[str] = field(default_factory=list)
|
||||
metrics: List[str] = field(default_factory=list)
|
||||
# metrics generated from semantic_model measures
|
||||
snapshots: List[str] = field(default_factory=list)
|
||||
# The following field will no longer be used. Leaving
|
||||
# here to avoid breaking existing projects. To be removed
|
||||
# later if possible.
|
||||
generated_metrics: List[str] = field(default_factory=list)
|
||||
# metrics generated from semantic_model measures. The key is
|
||||
# the name of the semantic_model, so that we can find it later.
|
||||
metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
|
||||
groups: List[str] = field(default_factory=list)
|
||||
# node patches contain models, seeds, snapshots, analyses
|
||||
ndp: List[str] = field(default_factory=list)
|
||||
@@ -207,6 +213,9 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
# created too, but those are in 'sources'
|
||||
sop: List[SourceKey] = field(default_factory=list)
|
||||
env_vars: Dict[str, Any] = field(default_factory=dict)
|
||||
unrendered_configs: Dict[str, Any] = field(default_factory=dict)
|
||||
unrendered_databases: Dict[str, Any] = field(default_factory=dict)
|
||||
unrendered_schemas: Dict[str, Any] = field(default_factory=dict)
|
||||
pp_dict: Optional[Dict[str, Any]] = None
|
||||
pp_test_index: Optional[Dict[str, Any]] = None
|
||||
|
||||
@@ -259,6 +268,40 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
return self.data_tests[yaml_key][name]
|
||||
return []
|
||||
|
||||
def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
|
||||
if self.generated_metrics:
|
||||
# Probably not needed, but for safety sake, convert the
|
||||
# old generated_metrics to metrics_from_measures.
|
||||
self.fix_metrics_from_measures()
|
||||
if semantic_model_name not in self.metrics_from_measures:
|
||||
self.metrics_from_measures[semantic_model_name] = []
|
||||
self.metrics_from_measures[semantic_model_name].append(metric_unique_id)
|
||||
|
||||
def fix_metrics_from_measures(self):
|
||||
# Temporary method to fix up existing projects with a partial parse file.
|
||||
# This should only be called if SchemaSourceFile in a msgpack
|
||||
# pack manifest has an existing "generated_metrics" list, to turn it
|
||||
# it into a "metrics_from_measures" dictionary, so that we can
|
||||
# correctly partially parse.
|
||||
# This code can be removed when "generated_metrics" is removed.
|
||||
generated_metrics = self.generated_metrics
|
||||
self.generated_metrics = [] # Should never be needed again
|
||||
# For each metric_unique_id we loop through the semantic models
|
||||
# looking for the name of the "measure" which generated the metric.
|
||||
# When it's found, add it to "metrics_from_measures", with a key
|
||||
# of the semantic_model name, and a list of metrics.
|
||||
for metric_unique_id in generated_metrics:
|
||||
parts = metric_unique_id.split(".")
|
||||
# get the metric_name
|
||||
metric_name = parts[-1]
|
||||
if "semantic_models" in self.dict_from_yaml:
|
||||
for sem_model in self.dict_from_yaml["semantic_models"]:
|
||||
if "measures" in sem_model:
|
||||
for measure in sem_model["measures"]:
|
||||
if measure["name"] == metric_name:
|
||||
self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
|
||||
break
|
||||
|
||||
def get_key_and_name_for_test(self, test_unique_id):
|
||||
yaml_key = None
|
||||
block_name = None
|
||||
@@ -278,6 +321,41 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
test_ids.extend(self.data_tests[key][name])
|
||||
return test_ids
|
||||
|
||||
def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
|
||||
versioned_name = f"{name}_v{version}" if version is not None else name
|
||||
|
||||
if yaml_key not in self.unrendered_configs:
|
||||
self.unrendered_configs[yaml_key] = {}
|
||||
|
||||
if versioned_name not in self.unrendered_configs[yaml_key]:
|
||||
self.unrendered_configs[yaml_key][versioned_name] = unrendered_config
|
||||
|
||||
def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
|
||||
versioned_name = f"{name}_v{version}" if version is not None else name
|
||||
|
||||
if yaml_key not in self.unrendered_configs:
|
||||
return None
|
||||
if versioned_name not in self.unrendered_configs[yaml_key]:
|
||||
return None
|
||||
|
||||
return self.unrendered_configs[yaml_key][versioned_name]
|
||||
|
||||
def delete_from_unrendered_configs(self, yaml_key, name):
|
||||
# We delete all unrendered_configs for this yaml_key/name because the
|
||||
# entry has been scheduled for reparsing.
|
||||
if self.get_unrendered_config(yaml_key, name):
|
||||
del self.unrendered_configs[yaml_key][name]
|
||||
# Delete all versioned keys associated with name
|
||||
version_names_to_delete = []
|
||||
for potential_version_name in self.unrendered_configs[yaml_key]:
|
||||
if potential_version_name.startswith(f"{name}_v"):
|
||||
version_names_to_delete.append(potential_version_name)
|
||||
for version_name in version_names_to_delete:
|
||||
del self.unrendered_configs[yaml_key][version_name]
|
||||
|
||||
if not self.unrendered_configs[yaml_key]:
|
||||
del self.unrendered_configs[yaml_key]
|
||||
|
||||
def add_env_var(self, var, yaml_key, name):
|
||||
if yaml_key not in self.env_vars:
|
||||
self.env_vars[yaml_key] = {}
|
||||
@@ -294,6 +372,30 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
if not self.env_vars[yaml_key]:
|
||||
del self.env_vars[yaml_key]
|
||||
|
||||
def add_unrendered_database(self, yaml_key: str, name: str, unrendered_database: str) -> None:
|
||||
if yaml_key not in self.unrendered_databases:
|
||||
self.unrendered_databases[yaml_key] = {}
|
||||
|
||||
self.unrendered_databases[yaml_key][name] = unrendered_database
|
||||
|
||||
def get_unrendered_database(self, yaml_key: str, name: str) -> Optional[str]:
|
||||
if yaml_key not in self.unrendered_databases:
|
||||
return None
|
||||
|
||||
return self.unrendered_databases[yaml_key].get(name)
|
||||
|
||||
def add_unrendered_schema(self, yaml_key: str, name: str, unrendered_schema: str) -> None:
|
||||
if yaml_key not in self.unrendered_schemas:
|
||||
self.unrendered_schemas[yaml_key] = {}
|
||||
|
||||
self.unrendered_schemas[yaml_key][name] = unrendered_schema
|
||||
|
||||
def get_unrendered_schema(self, yaml_key: str, name: str) -> Optional[str]:
|
||||
if yaml_key not in self.unrendered_schemas:
|
||||
return None
|
||||
|
||||
return self.unrendered_schemas[yaml_key].get(name)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FixtureSourceFile(BaseSourceFile):
|
||||
|
||||
@@ -29,11 +29,13 @@ from dbt.adapters.exceptions import (
|
||||
DuplicateMacroInPackageError,
|
||||
DuplicateMaterializationNameError,
|
||||
)
|
||||
from dbt.adapters.factory import get_adapter_package_names
|
||||
|
||||
# to preserve import paths
|
||||
from dbt.artifacts.resources import BaseResource, DeferRelation, NodeVersion
|
||||
from dbt.artifacts.resources import BaseResource, DeferRelation, NodeVersion, RefArgs
|
||||
from dbt.artifacts.resources.v1.config import NodeConfig
|
||||
from dbt.artifacts.schemas.manifest import ManifestMetadata, UniqueID, WritableManifest
|
||||
from dbt.clients.jinja_static import statically_parse_ref_or_source
|
||||
from dbt.contracts.files import (
|
||||
AnySourceFile,
|
||||
FileHash,
|
||||
@@ -53,10 +55,11 @@ from dbt.contracts.graph.nodes import (
|
||||
ManifestNode,
|
||||
Metric,
|
||||
ModelNode,
|
||||
ResultNode,
|
||||
SavedQuery,
|
||||
SeedNode,
|
||||
SemanticModel,
|
||||
SingularTestNode,
|
||||
SnapshotNode,
|
||||
SourceDefinition,
|
||||
UnitTestDefinition,
|
||||
UnitTestFileFixture,
|
||||
@@ -64,7 +67,7 @@ from dbt.contracts.graph.nodes import (
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import SourcePatch, UnparsedVersion
|
||||
from dbt.contracts.util import SourceKey
|
||||
from dbt.events.types import UnpinnedRefNewVersionAvailable
|
||||
from dbt.events.types import ArtifactWritten, UnpinnedRefNewVersionAvailable
|
||||
from dbt.exceptions import (
|
||||
AmbiguousResourceNameRefError,
|
||||
CompilationError,
|
||||
@@ -88,7 +91,7 @@ DocName = str
|
||||
RefName = str
|
||||
|
||||
|
||||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]):
|
||||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]) -> Optional[UniqueID]:
|
||||
if key not in storage:
|
||||
return None
|
||||
|
||||
@@ -412,11 +415,11 @@ class DisabledLookup(dbtClassMixin):
|
||||
self.storage: Dict[str, Dict[PackageName, List[Any]]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def populate(self, manifest):
|
||||
def populate(self, manifest: "Manifest"):
|
||||
for node in list(chain.from_iterable(manifest.disabled.values())):
|
||||
self.add_node(node)
|
||||
|
||||
def add_node(self, node):
|
||||
def add_node(self, node: GraphMemberNode) -> None:
|
||||
if node.search_name not in self.storage:
|
||||
self.storage[node.search_name] = {}
|
||||
if node.package_name not in self.storage[node.search_name]:
|
||||
@@ -426,8 +429,12 @@ class DisabledLookup(dbtClassMixin):
|
||||
# This should return a list of disabled nodes. It's different from
|
||||
# the other Lookup functions in that it returns full nodes, not just unique_ids
|
||||
def find(
|
||||
self, search_name, package: Optional[PackageName], version: Optional[NodeVersion] = None
|
||||
):
|
||||
self,
|
||||
search_name,
|
||||
package: Optional[PackageName],
|
||||
version: Optional[NodeVersion] = None,
|
||||
resource_types: Optional[List[NodeType]] = None,
|
||||
) -> Optional[List[Any]]:
|
||||
if version:
|
||||
search_name = f"{search_name}.v{version}"
|
||||
|
||||
@@ -436,22 +443,72 @@ class DisabledLookup(dbtClassMixin):
|
||||
|
||||
pkg_dct: Mapping[PackageName, List[Any]] = self.storage[search_name]
|
||||
|
||||
nodes = []
|
||||
if package is None:
|
||||
if not pkg_dct:
|
||||
return None
|
||||
else:
|
||||
return next(iter(pkg_dct.values()))
|
||||
nodes = next(iter(pkg_dct.values()))
|
||||
elif package in pkg_dct:
|
||||
return pkg_dct[package]
|
||||
nodes = pkg_dct[package]
|
||||
else:
|
||||
return None
|
||||
|
||||
if resource_types is None:
|
||||
return nodes
|
||||
else:
|
||||
new_nodes = []
|
||||
for node in nodes:
|
||||
if node.resource_type in resource_types:
|
||||
new_nodes.append(node)
|
||||
if not new_nodes:
|
||||
return None
|
||||
else:
|
||||
return new_nodes
|
||||
|
||||
|
||||
class AnalysisLookup(RefableLookup):
|
||||
_lookup_types: ClassVar[set] = set([NodeType.Analysis])
|
||||
_versioned_types: ClassVar[set] = set()
|
||||
|
||||
|
||||
class SingularTestLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: "Manifest") -> None:
|
||||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, search_name, package: Optional[PackageName]) -> Optional[UniqueID]:
|
||||
return find_unique_id_for_package(self.storage, search_name, package)
|
||||
|
||||
def find(
|
||||
self, search_name, package: Optional[PackageName], manifest: "Manifest"
|
||||
) -> Optional[SingularTestNode]:
|
||||
unique_id = self.get_unique_id(search_name, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_singular_test(self, source: SingularTestNode) -> None:
|
||||
if source.search_name not in self.storage:
|
||||
self.storage[source.search_name] = {}
|
||||
|
||||
self.storage[source.search_name][source.package_name] = source.unique_id
|
||||
|
||||
def populate(self, manifest: "Manifest") -> None:
|
||||
for node in manifest.nodes.values():
|
||||
if isinstance(node, SingularTestNode):
|
||||
self.add_singular_test(node)
|
||||
|
||||
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> SingularTestNode:
|
||||
if unique_id not in manifest.nodes:
|
||||
raise dbt_common.exceptions.DbtInternalError(
|
||||
f"Singular test {unique_id} found in cache but not found in manifest"
|
||||
)
|
||||
node = manifest.nodes[unique_id]
|
||||
assert isinstance(node, SingularTestNode)
|
||||
return node
|
||||
|
||||
|
||||
def _packages_to_search(
|
||||
current_project: str,
|
||||
node_package: str,
|
||||
@@ -657,10 +714,10 @@ class MacroMethods:
|
||||
self._macros_by_name = {}
|
||||
self._macros_by_package = {}
|
||||
|
||||
def find_macro_by_name(
|
||||
def find_macro_candidate_by_name(
|
||||
self, name: str, root_project_name: str, package: Optional[str]
|
||||
) -> Optional[Macro]:
|
||||
"""Find a macro in the graph by its name and package name, or None for
|
||||
) -> Optional[MacroCandidate]:
|
||||
"""Find a MacroCandidate in the graph by its name and package name, or None for
|
||||
any package. The root project name is used to determine priority:
|
||||
- locally defined macros come first
|
||||
- then imported macros
|
||||
@@ -678,7 +735,15 @@ class MacroMethods:
|
||||
filter=filter,
|
||||
)
|
||||
|
||||
return candidates.last()
|
||||
return candidates.last_candidate()
|
||||
|
||||
def find_macro_by_name(
|
||||
self, name: str, root_project_name: str, package: Optional[str]
|
||||
) -> Optional[Macro]:
|
||||
macro_candidate = self.find_macro_candidate_by_name(
|
||||
name=name, root_project_name=root_project_name, package=package
|
||||
)
|
||||
return macro_candidate.macro if macro_candidate else None
|
||||
|
||||
def find_generate_macro_by_name(
|
||||
self, component: str, root_project_name: str, imported_package: Optional[str] = None
|
||||
@@ -720,9 +785,6 @@ class MacroMethods:
|
||||
filter: Optional[Callable[[MacroCandidate], bool]] = None,
|
||||
) -> CandidateList:
|
||||
"""Find macros by their name."""
|
||||
# avoid an import cycle
|
||||
from dbt.adapters.factory import get_adapter_package_names
|
||||
|
||||
candidates: CandidateList = CandidateList()
|
||||
|
||||
macros_by_name = self.get_macros_by_name()
|
||||
@@ -854,6 +916,9 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
_analysis_lookup: Optional[AnalysisLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_singular_test_lookup: Optional[SingularTestLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_parsing_info: ParsingInfo = field(
|
||||
default_factory=ParsingInfo,
|
||||
metadata={"serialize": lambda x: None, "deserialize": lambda x: None},
|
||||
@@ -988,6 +1053,7 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self.metrics.values(),
|
||||
self.semantic_models.values(),
|
||||
self.saved_queries.values(),
|
||||
self.unit_tests.values(),
|
||||
)
|
||||
for resource in all_resources:
|
||||
resource_type_plural = resource.resource_type.pluralize()
|
||||
@@ -1094,6 +1160,7 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
metrics=cls._map_resources_to_map_nodes(writable_manifest.metrics),
|
||||
groups=cls._map_resources_to_map_nodes(writable_manifest.groups),
|
||||
semantic_models=cls._map_resources_to_map_nodes(writable_manifest.semantic_models),
|
||||
saved_queries=cls._map_resources_to_map_nodes(writable_manifest.saved_queries),
|
||||
selectors={
|
||||
selector_id: selector
|
||||
for selector_id, selector in writable_manifest.selectors.items()
|
||||
@@ -1160,7 +1227,9 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
)
|
||||
|
||||
def write(self, path):
|
||||
self.writable_manifest().write(path)
|
||||
writable = self.writable_manifest()
|
||||
writable.write(path)
|
||||
fire_event(ArtifactWritten(artifact_type=writable.__class__.__name__, artifact_path=path))
|
||||
|
||||
# Called in dbt.compilation.Linker.write_graph and
|
||||
# dbt.graph.queue.get and ._include_in_cost
|
||||
@@ -1247,29 +1316,16 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self._analysis_lookup = AnalysisLookup(self)
|
||||
return self._analysis_lookup
|
||||
|
||||
@property
|
||||
def singular_test_lookup(self) -> SingularTestLookup:
|
||||
if self._singular_test_lookup is None:
|
||||
self._singular_test_lookup = SingularTestLookup(self)
|
||||
return self._singular_test_lookup
|
||||
|
||||
@property
|
||||
def external_node_unique_ids(self):
|
||||
return [node.unique_id for node in self.nodes.values() if node.is_external_node]
|
||||
|
||||
def resolve_refs(
|
||||
self,
|
||||
source_node: ModelNode,
|
||||
current_project: str, # TODO: ModelNode is overly restrictive typing
|
||||
) -> List[MaybeNonSource]:
|
||||
resolved_refs: List[MaybeNonSource] = []
|
||||
for ref in source_node.refs:
|
||||
resolved = self.resolve_ref(
|
||||
source_node,
|
||||
ref.name,
|
||||
ref.package,
|
||||
ref.version,
|
||||
current_project,
|
||||
source_node.package_name,
|
||||
)
|
||||
resolved_refs.append(resolved)
|
||||
|
||||
return resolved_refs
|
||||
|
||||
# Called by dbt.parser.manifest._process_refs & ManifestLoader.check_for_model_deprecations
|
||||
def resolve_ref(
|
||||
self,
|
||||
@@ -1295,7 +1351,12 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
|
||||
# it's possible that the node is disabled
|
||||
if disabled is None:
|
||||
disabled = self.disabled_lookup.find(target_model_name, pkg, target_model_version)
|
||||
disabled = self.disabled_lookup.find(
|
||||
target_model_name,
|
||||
pkg,
|
||||
version=target_model_version,
|
||||
resource_types=REFABLE_NODE_TYPES,
|
||||
)
|
||||
|
||||
if disabled:
|
||||
return Disabled(disabled[0])
|
||||
@@ -1444,8 +1505,10 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
return is_private_ref and (
|
||||
not hasattr(node, "group")
|
||||
or not node.group
|
||||
# Invalid reference because group does not match
|
||||
or node.group != target_model.group
|
||||
or restrict_package_access
|
||||
# Or, invalid because these are different namespaces (project/package) and restrict-access is enforced
|
||||
or (node.package_name != target_model.package_name and restrict_package_access)
|
||||
)
|
||||
|
||||
def is_invalid_protected_ref(
|
||||
@@ -1550,12 +1613,14 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
if isinstance(node, GenericTestNode):
|
||||
assert test_from
|
||||
source_file.add_test(node.unique_id, test_from)
|
||||
if isinstance(node, Metric):
|
||||
elif isinstance(node, Metric):
|
||||
source_file.metrics.append(node.unique_id)
|
||||
if isinstance(node, Exposure):
|
||||
elif isinstance(node, Exposure):
|
||||
source_file.exposures.append(node.unique_id)
|
||||
if isinstance(node, Group):
|
||||
elif isinstance(node, Group):
|
||||
source_file.groups.append(node.unique_id)
|
||||
elif isinstance(node, SnapshotNode):
|
||||
source_file.snapshots.append(node.unique_id)
|
||||
elif isinstance(source_file, FixtureSourceFile):
|
||||
pass
|
||||
else:
|
||||
@@ -1566,13 +1631,15 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self.exposures[exposure.unique_id] = exposure
|
||||
source_file.exposures.append(exposure.unique_id)
|
||||
|
||||
def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False):
|
||||
def add_metric(
|
||||
self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None
|
||||
):
|
||||
_check_duplicates(metric, self.metrics)
|
||||
self.metrics[metric.unique_id] = metric
|
||||
if not generated:
|
||||
if not generated_from:
|
||||
source_file.metrics.append(metric.unique_id)
|
||||
else:
|
||||
source_file.generated_metrics.append(metric.unique_id)
|
||||
source_file.add_metrics_from_measures(generated_from, metric.unique_id)
|
||||
|
||||
def add_group(self, source_file: SchemaSourceFile, group: Group):
|
||||
_check_duplicates(group, self.groups)
|
||||
@@ -1586,7 +1653,7 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
else:
|
||||
self.disabled[node.unique_id] = [node]
|
||||
|
||||
def add_disabled(self, source_file: AnySourceFile, node: ResultNode, test_from=None):
|
||||
def add_disabled(self, source_file: AnySourceFile, node: GraphMemberNode, test_from=None):
|
||||
self.add_disabled_nofile(node)
|
||||
if isinstance(source_file, SchemaSourceFile):
|
||||
if isinstance(node, GenericTestNode):
|
||||
@@ -1600,6 +1667,8 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
source_file.semantic_models.append(node.unique_id)
|
||||
if isinstance(node, Exposure):
|
||||
source_file.exposures.append(node.unique_id)
|
||||
if isinstance(node, UnitTestDefinition):
|
||||
source_file.unit_tests.append(node.unique_id)
|
||||
elif isinstance(source_file, FixtureSourceFile):
|
||||
pass
|
||||
else:
|
||||
@@ -1634,6 +1703,22 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
|
||||
# end of methods formerly in ParseResult
|
||||
|
||||
def find_node_from_ref_or_source(
|
||||
self, expression: str
|
||||
) -> Optional[Union[ModelNode, SourceDefinition]]:
|
||||
ref_or_source = statically_parse_ref_or_source(expression)
|
||||
|
||||
node = None
|
||||
if isinstance(ref_or_source, RefArgs):
|
||||
node = self.ref_lookup.find(
|
||||
ref_or_source.name, ref_or_source.package, ref_or_source.version, self
|
||||
)
|
||||
else:
|
||||
source_name, source_table_name = ref_or_source[0], ref_or_source[1]
|
||||
node = self.source_lookup.find(f"{source_name}.{source_table_name}", None, self)
|
||||
|
||||
return node
|
||||
|
||||
# Provide support for copy.deepcopy() - we just need to avoid the lock!
|
||||
# pickle and deepcopy use this. It returns a callable object used to
|
||||
# create the initial version of the object and a tuple of arguments
|
||||
@@ -1668,18 +1753,37 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self._semantic_model_by_measure_lookup,
|
||||
self._disabled_lookup,
|
||||
self._analysis_lookup,
|
||||
self._singular_test_lookup,
|
||||
)
|
||||
return self.__class__, args
|
||||
|
||||
def _microbatch_macro_is_core(self, project_name: str) -> bool:
|
||||
microbatch_is_core = False
|
||||
candidate = self.find_macro_candidate_by_name(
|
||||
name="get_incremental_microbatch_sql", root_project_name=project_name, package=None
|
||||
)
|
||||
|
||||
# We want to check for "Core", because "Core" basically means "builtin"
|
||||
if candidate is not None and candidate.locality == Locality.Core:
|
||||
microbatch_is_core = True
|
||||
|
||||
return microbatch_is_core
|
||||
|
||||
def use_microbatch_batches(self, project_name: str) -> bool:
|
||||
return (
|
||||
get_flags().require_batched_execution_for_custom_microbatch_strategy
|
||||
or self._microbatch_macro_is_core(project_name=project_name)
|
||||
)
|
||||
|
||||
|
||||
class MacroManifest(MacroMethods):
|
||||
def __init__(self, macros) -> None:
|
||||
self.macros = macros
|
||||
self.metadata = ManifestMetadata(
|
||||
user_id=tracking.active_user.id if tracking.active_user else None,
|
||||
send_anonymous_usage_stats=get_flags().SEND_ANONYMOUS_USAGE_STATS
|
||||
if tracking.active_user
|
||||
else None,
|
||||
send_anonymous_usage_stats=(
|
||||
get_flags().SEND_ANONYMOUS_USAGE_STATS if tracking.active_user else None
|
||||
),
|
||||
)
|
||||
# This is returned by the 'graph' context property
|
||||
# in the ProviderContext class.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
from typing import Any, Dict, Iterator, List
|
||||
|
||||
from dbt_semantic_interfaces.type_enums import MetricType
|
||||
|
||||
from dbt.contracts.graph.manifest import Manifest, Metric
|
||||
from dbt_semantic_interfaces.type_enums import MetricType
|
||||
|
||||
DERIVED_METRICS = [MetricType.DERIVED, MetricType.RATIO]
|
||||
BASE_METRICS = [MetricType.SIMPLE, MetricType.CUMULATIVE, MetricType.CONVERSION]
|
||||
|
||||
@@ -39,12 +39,6 @@ class UnitTestNodeConfig(NodeConfig):
|
||||
expected_sql: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmptySnapshotConfig(NodeConfig):
|
||||
materialized: str = "snapshot"
|
||||
unique_key: Optional[str] = None # override NodeConfig unique_key definition
|
||||
|
||||
|
||||
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
|
||||
NodeType.Metric: MetricConfig,
|
||||
NodeType.SemanticModel: SemanticModelConfig,
|
||||
@@ -62,7 +56,6 @@ RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
|
||||
# base resource types are like resource types, except nothing has mandatory
|
||||
# configs.
|
||||
BASE_RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = RESOURCE_TYPES.copy()
|
||||
BASE_RESOURCE_TYPES.update({NodeType.Snapshot: EmptySnapshotConfig})
|
||||
|
||||
|
||||
def get_config_for(resource_type: NodeType, base=False) -> Type[BaseConfig]:
|
||||
|
||||
@@ -2,6 +2,7 @@ import hashlib
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
@@ -18,7 +19,8 @@ from typing import (
|
||||
|
||||
from mashumaro.types import SerializableType
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.adapters.base import ConstraintSupport
|
||||
from dbt.adapters.factory import get_adapter_constraint_support
|
||||
from dbt.artifacts.resources import Analysis as AnalysisResource
|
||||
from dbt.artifacts.resources import (
|
||||
BaseResource,
|
||||
@@ -56,8 +58,11 @@ from dbt.artifacts.resources import SingularTest as SingularTestResource
|
||||
from dbt.artifacts.resources import Snapshot as SnapshotResource
|
||||
from dbt.artifacts.resources import SourceDefinition as SourceDefinitionResource
|
||||
from dbt.artifacts.resources import SqlOperation as SqlOperationResource
|
||||
from dbt.artifacts.resources import TimeSpine
|
||||
from dbt.artifacts.resources import UnitTestDefinition as UnitTestDefinitionResource
|
||||
from dbt.contracts.graph.model_config import EmptySnapshotConfig, UnitTestNodeConfig
|
||||
from dbt.artifacts.schemas.batch_results import BatchResults
|
||||
from dbt.clients.jinja_static import statically_extract_has_name_this
|
||||
from dbt.contracts.graph.model_config import UnitTestNodeConfig
|
||||
from dbt.contracts.graph.node_args import ModelNodeArgs
|
||||
from dbt.contracts.graph.unparsed import (
|
||||
HasYamlMetadata,
|
||||
@@ -83,7 +88,12 @@ from dbt.node_types import (
|
||||
NodeType,
|
||||
)
|
||||
from dbt_common.clients.system import write_file
|
||||
from dbt_common.contracts.constraints import ConstraintType
|
||||
from dbt_common.contracts.constraints import (
|
||||
ColumnLevelConstraint,
|
||||
ConstraintType,
|
||||
ModelLevelConstraint,
|
||||
)
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_common.events.contextvars import set_log_contextvars
|
||||
from dbt_common.events.functions import warn_or_error
|
||||
|
||||
@@ -237,7 +247,9 @@ class NodeInfoMixin:
|
||||
|
||||
@dataclass
|
||||
class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType):
|
||||
def get_target_write_path(self, target_path: str, subdirectory: str):
|
||||
def get_target_write_path(
|
||||
self, target_path: str, subdirectory: str, split_suffix: Optional[str] = None
|
||||
):
|
||||
# This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory
|
||||
if os.path.basename(self.path) == os.path.basename(self.original_file_path):
|
||||
# One-to-one relationship of nodes to files.
|
||||
@@ -245,6 +257,15 @@ class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, Serializabl
|
||||
else:
|
||||
# Many-to-one relationship of nodes to files.
|
||||
path = os.path.join(self.original_file_path, self.path)
|
||||
|
||||
if split_suffix:
|
||||
pathlib_path = Path(path)
|
||||
path = str(
|
||||
pathlib_path.parent
|
||||
/ pathlib_path.stem
|
||||
/ (pathlib_path.stem + f"_{split_suffix}" + pathlib_path.suffix)
|
||||
)
|
||||
|
||||
target_write_path = os.path.join(target_path, subdirectory, self.package_name, path)
|
||||
return target_write_path
|
||||
|
||||
@@ -422,8 +443,40 @@ class HookNode(HookNodeResource, CompiledNode):
|
||||
return HookNodeResource
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchContext(dbtClassMixin):
|
||||
id: str
|
||||
event_time_start: datetime
|
||||
event_time_end: datetime
|
||||
|
||||
def __post_serialize__(self, data, context):
|
||||
# This is insane, but necessary, I apologize. Mashumaro handles the
|
||||
# dictification of this class via a compile time generated `to_dict`
|
||||
# method based off of the _typing_ of th class. By default `datetime`
|
||||
# types are converted to strings. We don't want that, we want them to
|
||||
# stay datetimes.
|
||||
# Note: This is safe because the `BatchContext` isn't part of the artifact
|
||||
# and thus doesn't get written out.
|
||||
new_data = super().__post_serialize__(data, context)
|
||||
new_data["event_time_start"] = self.event_time_start
|
||||
new_data["event_time_end"] = self.event_time_end
|
||||
return new_data
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelNode(ModelResource, CompiledNode):
|
||||
previous_batch_results: Optional[BatchResults] = None
|
||||
batch: Optional[BatchContext] = None
|
||||
_has_this: Optional[bool] = None
|
||||
|
||||
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
||||
dct = super().__post_serialize__(dct, context)
|
||||
if "_has_this" in dct:
|
||||
del dct["_has_this"]
|
||||
if "previous_batch_results" in dct:
|
||||
del dct["previous_batch_results"]
|
||||
return dct
|
||||
|
||||
@classmethod
|
||||
def resource_class(cls) -> Type[ModelResource]:
|
||||
return ModelResource
|
||||
@@ -469,6 +522,13 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
def is_latest_version(self) -> bool:
|
||||
return self.version is not None and self.version == self.latest_version
|
||||
|
||||
@property
|
||||
def is_past_deprecation_date(self) -> bool:
|
||||
return (
|
||||
self.deprecation_date is not None
|
||||
and self.deprecation_date < datetime.now().astimezone()
|
||||
)
|
||||
|
||||
@property
|
||||
def search_name(self):
|
||||
if self.version is None:
|
||||
@@ -480,6 +540,24 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
def materialization_enforces_constraints(self) -> bool:
|
||||
return self.config.materialized in ["table", "incremental"]
|
||||
|
||||
@property
|
||||
def all_constraints(self) -> List[Union[ModelLevelConstraint, ColumnLevelConstraint]]:
|
||||
constraints: List[Union[ModelLevelConstraint, ColumnLevelConstraint]] = []
|
||||
for model_level_constraint in self.constraints:
|
||||
constraints.append(model_level_constraint)
|
||||
|
||||
for column in self.columns.values():
|
||||
for column_level_constraint in column.constraints:
|
||||
constraints.append(column_level_constraint)
|
||||
|
||||
return constraints
|
||||
|
||||
@property
|
||||
def has_this(self) -> bool:
|
||||
if self._has_this is None:
|
||||
self._has_this = statically_extract_has_name_this(self.raw_code)
|
||||
return self._has_this
|
||||
|
||||
def infer_primary_key(self, data_tests: List["GenericTestNode"]) -> List[str]:
|
||||
"""
|
||||
Infers the columns that can be used as primary key of a model in the following order:
|
||||
@@ -501,11 +579,20 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
columns_with_disabled_unique_tests = set()
|
||||
columns_with_not_null_tests = set()
|
||||
for test in data_tests:
|
||||
columns = []
|
||||
if "column_name" in test.test_metadata.kwargs:
|
||||
columns: List[str] = []
|
||||
# extract columns from test kwargs, ensuring columns is a List[str] given tests can have custom (user or pacakge-defined) kwarg types
|
||||
if "column_name" in test.test_metadata.kwargs and isinstance(
|
||||
test.test_metadata.kwargs["column_name"], str
|
||||
):
|
||||
columns = [test.test_metadata.kwargs["column_name"]]
|
||||
elif "combination_of_columns" in test.test_metadata.kwargs:
|
||||
columns = test.test_metadata.kwargs["combination_of_columns"]
|
||||
elif "combination_of_columns" in test.test_metadata.kwargs and isinstance(
|
||||
test.test_metadata.kwargs["combination_of_columns"], list
|
||||
):
|
||||
columns = [
|
||||
column
|
||||
for column in test.test_metadata.kwargs["combination_of_columns"]
|
||||
if isinstance(column, str)
|
||||
]
|
||||
|
||||
for column in columns:
|
||||
if test.test_metadata.name in ["unique", "unique_combination_of_columns"]:
|
||||
@@ -570,6 +657,42 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
data = contract_state.encode("utf-8")
|
||||
self.contract.checksum = hashlib.new("sha256", data).hexdigest()
|
||||
|
||||
def same_contract_removed(self) -> bool:
|
||||
"""
|
||||
self: the removed (deleted, renamed, or disabled) model node
|
||||
"""
|
||||
# If the contract wasn't previously enforced, no contract change has occurred
|
||||
if self.contract.enforced is False:
|
||||
return True
|
||||
|
||||
# Removed node is past its deprecation_date, so deletion does not constitute a contract change
|
||||
if self.is_past_deprecation_date:
|
||||
return True
|
||||
|
||||
# Disabled, deleted, or renamed node with previously enforced contract.
|
||||
if not self.config.enabled:
|
||||
breaking_change = f"Contracted model '{self.unique_id}' was disabled."
|
||||
else:
|
||||
breaking_change = f"Contracted model '{self.unique_id}' was deleted or renamed."
|
||||
|
||||
if self.version is None:
|
||||
warn_or_error(
|
||||
UnversionedBreakingChange(
|
||||
breaking_changes=[breaking_change],
|
||||
model_name=self.name,
|
||||
model_file_path=self.original_file_path,
|
||||
),
|
||||
node=self,
|
||||
)
|
||||
return False
|
||||
else:
|
||||
raise (
|
||||
ContractBreakingChangeError(
|
||||
breaking_changes=[breaking_change],
|
||||
node=self,
|
||||
)
|
||||
)
|
||||
|
||||
def same_contract(self, old, adapter_type=None) -> bool:
|
||||
# If the contract wasn't previously enforced:
|
||||
if old.contract.enforced is False and self.contract.enforced is False:
|
||||
@@ -591,9 +714,9 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
contract_enforced_disabled: bool = False
|
||||
columns_removed: List[str] = []
|
||||
column_type_changes: List[Dict[str, str]] = []
|
||||
enforced_column_constraint_removed: List[
|
||||
Dict[str, str]
|
||||
] = [] # column_name, constraint_type
|
||||
enforced_column_constraint_removed: List[Dict[str, str]] = (
|
||||
[]
|
||||
) # column_name, constraint_type
|
||||
enforced_model_constraint_removed: List[Dict[str, Any]] = [] # constraint_type, columns
|
||||
materialization_changed: List[str] = []
|
||||
|
||||
@@ -601,10 +724,6 @@ class ModelNode(ModelResource, CompiledNode):
|
||||
# Breaking change: the contract was previously enforced, and it no longer is
|
||||
contract_enforced_disabled = True
|
||||
|
||||
# TODO: this avoid the circular imports but isn't ideal
|
||||
from dbt.adapters.base import ConstraintSupport
|
||||
from dbt.adapters.factory import get_adapter_constraint_support
|
||||
|
||||
constraint_support = get_adapter_constraint_support(adapter_type)
|
||||
column_constraints_exist = False
|
||||
|
||||
@@ -1000,19 +1119,6 @@ class UnitTestFileFixture(BaseNode):
|
||||
# ====================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntermediateSnapshotNode(CompiledNode):
|
||||
# at an intermediate stage in parsing, where we've built something better
|
||||
# than an unparsed node for rendering in parse mode, it's pretty possible
|
||||
# that we won't have critical snapshot-related information that is only
|
||||
# defined in config blocks. To fix that, we have an intermediate type that
|
||||
# uses a regular node config, which the snapshot parser will then convert
|
||||
# into a full ParsedSnapshotNode after rendering. Note: it currently does
|
||||
# not work to set snapshot config in schema files because of the validation.
|
||||
resource_type: Literal[NodeType.Snapshot]
|
||||
config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotNode(SnapshotResource, CompiledNode):
|
||||
@classmethod
|
||||
@@ -1092,7 +1198,7 @@ class UnpatchedSourceDefinition(BaseNode):
|
||||
def get_source_representation(self):
|
||||
return f'source("{self.source.name}", "{self.table.name}")'
|
||||
|
||||
def validate_data_tests(self):
|
||||
def validate_data_tests(self, is_root_project: bool):
|
||||
"""
|
||||
sources parse tests differently than models, so we need to do some validation
|
||||
here where it's done in the PatchParser for other nodes
|
||||
@@ -1103,11 +1209,6 @@ class UnpatchedSourceDefinition(BaseNode):
|
||||
"Invalid test config: cannot have both 'tests' and 'data_tests' defined"
|
||||
)
|
||||
if self.tests:
|
||||
deprecations.warn(
|
||||
"project-test-config",
|
||||
deprecated_path="tests",
|
||||
exp_path="data_tests",
|
||||
)
|
||||
self.data_tests.extend(self.tests)
|
||||
self.tests.clear()
|
||||
|
||||
@@ -1118,11 +1219,6 @@ class UnpatchedSourceDefinition(BaseNode):
|
||||
"Invalid test config: cannot have both 'tests' and 'data_tests' defined"
|
||||
)
|
||||
if column.tests:
|
||||
deprecations.warn(
|
||||
"project-test-config",
|
||||
deprecated_path="tests",
|
||||
exp_path="data_tests",
|
||||
)
|
||||
column.data_tests.extend(column.tests)
|
||||
column.tests.clear()
|
||||
|
||||
@@ -1140,7 +1236,6 @@ class UnpatchedSourceDefinition(BaseNode):
|
||||
return [] if self.table.columns is None else self.table.columns
|
||||
|
||||
def get_tests(self) -> Iterator[Tuple[Dict[str, Any], Optional[UnparsedColumn]]]:
|
||||
self.validate_data_tests()
|
||||
for data_test in self.data_tests:
|
||||
yield normalize_test(data_test), None
|
||||
|
||||
@@ -1177,12 +1272,16 @@ class SourceDefinition(
|
||||
return SourceDefinitionResource
|
||||
|
||||
def same_database_representation(self, other: "SourceDefinition") -> bool:
|
||||
return (
|
||||
self.database == other.database
|
||||
and self.schema == other.schema
|
||||
and self.identifier == other.identifier
|
||||
and True
|
||||
)
|
||||
|
||||
# preserve legacy behaviour -- use potentially rendered database
|
||||
if get_flags().state_modified_compare_more_unrendered_values is False:
|
||||
same_database = self.database == other.database
|
||||
same_schema = self.schema == other.schema
|
||||
else:
|
||||
same_database = self.unrendered_database == other.unrendered_database
|
||||
same_schema = self.unrendered_schema == other.unrendered_schema
|
||||
|
||||
return same_database and same_schema and self.identifier == other.identifier and True
|
||||
|
||||
def same_quoting(self, other: "SourceDefinition") -> bool:
|
||||
return self.quoting == other.quoting
|
||||
@@ -1421,6 +1520,13 @@ class Group(GroupResource, BaseNode):
|
||||
def resource_class(cls) -> Type[GroupResource]:
|
||||
return GroupResource
|
||||
|
||||
def to_logging_dict(self) -> Dict[str, Union[str, Dict[str, str]]]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"package_name": self.package_name,
|
||||
"owner": self.owner.to_dict(omit_none=True),
|
||||
}
|
||||
|
||||
|
||||
# ====================================
|
||||
# SemanticModel node
|
||||
@@ -1521,19 +1627,17 @@ class SavedQuery(NodeInfoMixin, GraphNode, SavedQueryResource):
|
||||
return self.group == old.group
|
||||
|
||||
def same_exports(self, old: "SavedQuery") -> bool:
|
||||
# TODO: This isn't currently used in `same_contents` (nor called anywhere else)
|
||||
if len(self.exports) != len(old.exports):
|
||||
return False
|
||||
|
||||
# exports should be in the same order, so we zip them for easy iteration
|
||||
for (old_export, new_export) in zip(old.exports, self.exports):
|
||||
if not (
|
||||
old_export.name == new_export.name
|
||||
and old_export.config.export_as == new_export.config.export_as
|
||||
and old_export.config.schema_name == new_export.config.schema_name
|
||||
and old_export.config.alias == new_export.config.alias
|
||||
):
|
||||
for old_export, new_export in zip(old.exports, self.exports):
|
||||
if not (old_export.name == new_export.name):
|
||||
return False
|
||||
keys = ["export_as", "schema", "alias"]
|
||||
for key in keys:
|
||||
if old_export.unrendered_config.get(key) != new_export.unrendered_config.get(key):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@@ -1551,6 +1655,7 @@ class SavedQuery(NodeInfoMixin, GraphNode, SavedQueryResource):
|
||||
and self.same_label(old)
|
||||
and self.same_config(old)
|
||||
and self.same_group(old)
|
||||
and self.same_exports(old)
|
||||
and True
|
||||
)
|
||||
|
||||
@@ -1580,6 +1685,7 @@ class ParsedNodePatch(ParsedPatch):
|
||||
latest_version: Optional[NodeVersion]
|
||||
constraints: List[Dict[str, Any]]
|
||||
deprecation_date: Optional[datetime]
|
||||
time_spine: Optional[TimeSpine] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -1587,6 +1693,11 @@ class ParsedMacroPatch(ParsedPatch):
|
||||
arguments: List[MacroArgument] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedSingularTestPatch(ParsedPatch):
|
||||
pass
|
||||
|
||||
|
||||
# ====================================
|
||||
# Node unions/categories
|
||||
# ====================================
|
||||
@@ -1614,6 +1725,7 @@ ManifestNode = Union[
|
||||
ResultNode = Union[
|
||||
ManifestNode,
|
||||
SourceDefinition,
|
||||
HookNode,
|
||||
]
|
||||
|
||||
# All nodes that can be in the DAG
|
||||
@@ -1636,6 +1748,7 @@ Resource = Union[
|
||||
|
||||
TestNode = Union[SingularTestNode, GenericTestNode]
|
||||
|
||||
SemanticManifestNode = Union[SavedQuery, SemanticModel, Metric]
|
||||
|
||||
RESOURCE_CLASS_TO_NODE_CLASS: Dict[Type[BaseResource], Type[BaseNode]] = {
|
||||
node_class.resource_class(): node_class
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.constants import (
|
||||
LEGACY_TIME_SPINE_GRANULARITY,
|
||||
LEGACY_TIME_SPINE_MODEL_NAME,
|
||||
MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY,
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.nodes import ModelNode
|
||||
from dbt.events.types import ArtifactWritten, SemanticValidationFailure
|
||||
from dbt.exceptions import ParsingError
|
||||
from dbt.flags import get_flags
|
||||
from dbt_common.clients.system import write_file
|
||||
from dbt_common.events.base_types import EventLevel
|
||||
from dbt_common.events.functions import fire_event
|
||||
from dbt_semantic_interfaces.implementations.metric import PydanticMetric
|
||||
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
|
||||
from dbt_semantic_interfaces.implementations.project_configuration import (
|
||||
PydanticProjectConfiguration,
|
||||
)
|
||||
@@ -7,23 +24,27 @@ from dbt_semantic_interfaces.implementations.semantic_manifest import (
|
||||
PydanticSemanticManifest,
|
||||
)
|
||||
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel
|
||||
from dbt_semantic_interfaces.implementations.time_spine import (
|
||||
PydanticTimeSpine,
|
||||
PydanticTimeSpineCustomGranularityColumn,
|
||||
PydanticTimeSpinePrimaryColumn,
|
||||
)
|
||||
from dbt_semantic_interfaces.implementations.time_spine_table_configuration import (
|
||||
PydanticTimeSpineTableConfiguration,
|
||||
PydanticTimeSpineTableConfiguration as LegacyTimeSpine,
|
||||
)
|
||||
from dbt_semantic_interfaces.type_enums import TimeGranularity
|
||||
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
|
||||
SemanticManifestValidator,
|
||||
)
|
||||
|
||||
from dbt.events.types import SemanticValidationFailure
|
||||
from dbt.exceptions import ParsingError
|
||||
from dbt_common.clients.system import write_file
|
||||
from dbt_common.events.base_types import EventLevel
|
||||
from dbt_common.events.functions import fire_event
|
||||
from dbt_semantic_interfaces.validations.validator_helpers import (
|
||||
FileContext,
|
||||
ValidationError,
|
||||
ValidationIssueContext,
|
||||
)
|
||||
|
||||
|
||||
class SemanticManifest:
|
||||
def __init__(self, manifest) -> None:
|
||||
def __init__(self, manifest: Manifest) -> None:
|
||||
self.manifest = manifest
|
||||
|
||||
def validate(self) -> bool:
|
||||
@@ -44,23 +65,116 @@ class SemanticManifest:
|
||||
semantic_manifest = self._get_pydantic_semantic_manifest()
|
||||
validator = SemanticManifestValidator[PydanticSemanticManifest]()
|
||||
validation_results = validator.validate_semantic_manifest(semantic_manifest)
|
||||
validation_result_errors = list(validation_results.errors)
|
||||
|
||||
metrics_using_old_params: Set[str] = set()
|
||||
for metric in semantic_manifest.metrics or []:
|
||||
for field in ("window", "grain_to_date"):
|
||||
type_params_field_value = getattr(metric.type_params, field)
|
||||
# Warn that the old type_params structure has been deprecated.
|
||||
if type_params_field_value:
|
||||
metrics_using_old_params.add(metric.name)
|
||||
if metrics_using_old_params:
|
||||
if get_flags().require_nested_cumulative_type_params is False:
|
||||
deprecations.warn(
|
||||
"mf-cumulative-type-params-deprecation",
|
||||
)
|
||||
else:
|
||||
names = ", ".join(metrics_using_old_params)
|
||||
validation_result_errors.append(
|
||||
ValidationError(
|
||||
context=ValidationIssueContext(
|
||||
# We don't have the file context at this point.
|
||||
file_context=FileContext(),
|
||||
object_name=names,
|
||||
object_type="metric",
|
||||
),
|
||||
message=f"Cumulative fields `type_params.window` and `type_params.grain_to_date` should be nested under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. Invalid metrics: {names}. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes.",
|
||||
)
|
||||
)
|
||||
|
||||
time_spines = semantic_manifest.project_configuration.time_spines
|
||||
legacy_time_spines = (
|
||||
semantic_manifest.project_configuration.time_spine_table_configurations
|
||||
)
|
||||
# If the time spine contains a day grain then it is functionally equivalent to the legacy time spine.
|
||||
time_spines_contain_day = any(
|
||||
c for c in time_spines if c.primary_column.time_granularity == TimeGranularity.DAY
|
||||
)
|
||||
if (
|
||||
get_flags().require_yaml_configuration_for_mf_time_spines is False
|
||||
and legacy_time_spines
|
||||
and not time_spines_contain_day
|
||||
):
|
||||
deprecations.warn(
|
||||
"mf-timespine-without-yaml-configuration",
|
||||
)
|
||||
|
||||
for warning in validation_results.warnings:
|
||||
fire_event(SemanticValidationFailure(msg=warning.message))
|
||||
|
||||
for error in validation_results.errors:
|
||||
for error in validation_result_errors:
|
||||
fire_event(SemanticValidationFailure(msg=error.message), EventLevel.ERROR)
|
||||
|
||||
return not validation_results.errors
|
||||
return not validation_result_errors
|
||||
|
||||
def write_json_to_file(self, file_path: str):
|
||||
semantic_manifest = self._get_pydantic_semantic_manifest()
|
||||
json = semantic_manifest.json()
|
||||
write_file(file_path, json)
|
||||
fire_event(ArtifactWritten(artifact_type=self.__class__.__name__, artifact_path=file_path))
|
||||
|
||||
def _get_pydantic_semantic_manifest(self) -> PydanticSemanticManifest:
|
||||
pydantic_time_spines: List[PydanticTimeSpine] = []
|
||||
minimum_time_spine_granularity: Optional[TimeGranularity] = None
|
||||
for node in self.manifest.nodes.values():
|
||||
if not (isinstance(node, ModelNode) and node.time_spine):
|
||||
continue
|
||||
time_spine = node.time_spine
|
||||
standard_granularity_column = None
|
||||
for column in node.columns.values():
|
||||
if column.name == time_spine.standard_granularity_column:
|
||||
standard_granularity_column = column
|
||||
break
|
||||
# Assertions needed for type checking
|
||||
if not standard_granularity_column:
|
||||
raise ParsingError(
|
||||
"Expected to find time spine standard granularity column in model columns, but did not. "
|
||||
"This should have been caught in YAML parsing."
|
||||
)
|
||||
if not standard_granularity_column.granularity:
|
||||
raise ParsingError(
|
||||
"Expected to find granularity set for time spine standard granularity column, but did not. "
|
||||
"This should have been caught in YAML parsing."
|
||||
)
|
||||
pydantic_time_spine = PydanticTimeSpine(
|
||||
node_relation=PydanticNodeRelation(
|
||||
alias=node.alias,
|
||||
schema_name=node.schema,
|
||||
database=node.database,
|
||||
relation_name=node.relation_name,
|
||||
),
|
||||
primary_column=PydanticTimeSpinePrimaryColumn(
|
||||
name=time_spine.standard_granularity_column,
|
||||
time_granularity=standard_granularity_column.granularity,
|
||||
),
|
||||
custom_granularities=[
|
||||
PydanticTimeSpineCustomGranularityColumn(
|
||||
name=custom_granularity.name, column_name=custom_granularity.column_name
|
||||
)
|
||||
for custom_granularity in time_spine.custom_granularities
|
||||
],
|
||||
)
|
||||
pydantic_time_spines.append(pydantic_time_spine)
|
||||
if (
|
||||
not minimum_time_spine_granularity
|
||||
or standard_granularity_column.granularity.to_int()
|
||||
< minimum_time_spine_granularity.to_int()
|
||||
):
|
||||
minimum_time_spine_granularity = standard_granularity_column.granularity
|
||||
|
||||
project_config = PydanticProjectConfiguration(
|
||||
time_spine_table_configurations=[],
|
||||
time_spine_table_configurations=[], time_spines=pydantic_time_spines
|
||||
)
|
||||
pydantic_semantic_manifest = PydanticSemanticManifest(
|
||||
metrics=[], semantic_models=[], project_configuration=project_config
|
||||
@@ -79,25 +193,39 @@ class SemanticManifest:
|
||||
PydanticSavedQuery.parse_obj(saved_query.to_dict())
|
||||
)
|
||||
|
||||
# Look for time-spine table model and create time spine table configuration
|
||||
if self.manifest.semantic_models:
|
||||
# Get model for time_spine_table
|
||||
time_spine_model_name = "metricflow_time_spine"
|
||||
model = self.manifest.ref_lookup.find(time_spine_model_name, None, None, self.manifest)
|
||||
if not model:
|
||||
raise ParsingError(
|
||||
"The semantic layer requires a 'metricflow_time_spine' model in the project, but none was found. "
|
||||
"Guidance on creating this model can be found on our docs site ("
|
||||
"https://docs.getdbt.com/docs/build/metricflow-time-spine) "
|
||||
)
|
||||
# Create time_spine_table_config, set it in project_config, and add to semantic manifest
|
||||
time_spine_table_config = PydanticTimeSpineTableConfiguration(
|
||||
location=model.relation_name,
|
||||
column_name="date_day",
|
||||
grain=TimeGranularity.DAY,
|
||||
legacy_time_spine_model = self.manifest.ref_lookup.find(
|
||||
LEGACY_TIME_SPINE_MODEL_NAME, None, None, self.manifest
|
||||
)
|
||||
pydantic_semantic_manifest.project_configuration.time_spine_table_configurations = [
|
||||
time_spine_table_config
|
||||
]
|
||||
if legacy_time_spine_model:
|
||||
if (
|
||||
not minimum_time_spine_granularity
|
||||
or LEGACY_TIME_SPINE_GRANULARITY.to_int()
|
||||
< minimum_time_spine_granularity.to_int()
|
||||
):
|
||||
minimum_time_spine_granularity = LEGACY_TIME_SPINE_GRANULARITY
|
||||
|
||||
# If no time spines have been configured at DAY or smaller AND legacy time spine model does not exist, error.
|
||||
if (
|
||||
not minimum_time_spine_granularity
|
||||
or minimum_time_spine_granularity.to_int()
|
||||
> MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY.to_int()
|
||||
):
|
||||
raise ParsingError(
|
||||
"The semantic layer requires a time spine model with granularity DAY or smaller in the project, "
|
||||
"but none was found. Guidance on creating this model can be found on our docs site "
|
||||
"(https://docs.getdbt.com/docs/build/metricflow-time-spine)."
|
||||
)
|
||||
|
||||
# For backward compatibility: if legacy time spine exists, include it in the manifest.
|
||||
if legacy_time_spine_model:
|
||||
legacy_time_spine = LegacyTimeSpine(
|
||||
location=legacy_time_spine_model.relation_name,
|
||||
column_name="date_day",
|
||||
grain=LEGACY_TIME_SPINE_GRANULARITY,
|
||||
)
|
||||
pydantic_semantic_manifest.project_configuration.time_spine_table_configurations = [
|
||||
legacy_time_spine
|
||||
]
|
||||
|
||||
return pydantic_semantic_manifest
|
||||
|
||||
@@ -4,8 +4,6 @@ from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Literal, Optional, Sequence, Union
|
||||
|
||||
from dbt_semantic_interfaces.type_enums import ConversionCalculationType
|
||||
|
||||
# trigger the PathEncoder
|
||||
import dbt_common.helper_types # noqa:F401
|
||||
from dbt import deprecations
|
||||
@@ -23,6 +21,7 @@ from dbt.artifacts.resources import (
|
||||
NodeVersion,
|
||||
Owner,
|
||||
Quoting,
|
||||
TimeSpine,
|
||||
UnitTestInputFixture,
|
||||
UnitTestNodeVersions,
|
||||
UnitTestOutputFixture,
|
||||
@@ -39,6 +38,10 @@ from dbt_common.dataclass_schema import (
|
||||
dbtClassMixin,
|
||||
)
|
||||
from dbt_common.exceptions import DbtInternalError
|
||||
from dbt_semantic_interfaces.type_enums import (
|
||||
ConversionCalculationType,
|
||||
PeriodAggregation,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -114,6 +117,7 @@ class HasColumnAndTestProps(HasColumnProps):
|
||||
class UnparsedColumn(HasColumnAndTestProps):
|
||||
quote: Optional[bool] = None
|
||||
tags: List[str] = field(default_factory=list)
|
||||
granularity: Optional[str] = None # str is really a TimeGranularity Enum
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -198,6 +202,11 @@ class UnparsedAnalysisUpdate(HasConfig, HasColumnDocs, HasColumnProps, HasYamlMe
|
||||
access: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedSingularTestUpdate(HasConfig, HasColumnProps, HasYamlMetadata):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedNodeUpdate(HasConfig, HasColumnTests, HasColumnAndTestProps, HasYamlMetadata):
|
||||
quote_columns: Optional[bool] = None
|
||||
@@ -211,6 +220,7 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
|
||||
latest_version: Optional[NodeVersion] = None
|
||||
versions: Sequence[UnparsedVersion] = field(default_factory=list)
|
||||
deprecation_date: Optional[datetime.datetime] = None
|
||||
time_spine: Optional[TimeSpine] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.latest_version:
|
||||
@@ -232,6 +242,41 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
|
||||
|
||||
self.deprecation_date = normalize_date(self.deprecation_date)
|
||||
|
||||
if self.time_spine:
|
||||
columns = (
|
||||
self.get_columns_for_version(self.latest_version)
|
||||
if self.latest_version
|
||||
else self.columns
|
||||
)
|
||||
column_names_to_columns = {column.name: column for column in columns}
|
||||
if self.time_spine.standard_granularity_column not in column_names_to_columns:
|
||||
raise ParsingError(
|
||||
f"Time spine standard granularity column must be defined on the model. Got invalid "
|
||||
f"column name '{self.time_spine.standard_granularity_column}' for model '{self.name}'. Valid names"
|
||||
f"{' for latest version' if self.latest_version else ''}: {list(column_names_to_columns.keys())}."
|
||||
)
|
||||
standard_column = column_names_to_columns[self.time_spine.standard_granularity_column]
|
||||
if not standard_column.granularity:
|
||||
raise ParsingError(
|
||||
f"Time spine standard granularity column must have a granularity defined. Please add one for "
|
||||
f"column '{self.time_spine.standard_granularity_column}' in model '{self.name}'."
|
||||
)
|
||||
custom_granularity_columns_not_found = []
|
||||
for custom_granularity in self.time_spine.custom_granularities:
|
||||
column_name = (
|
||||
custom_granularity.column_name
|
||||
if custom_granularity.column_name
|
||||
else custom_granularity.name
|
||||
)
|
||||
if column_name not in column_names_to_columns:
|
||||
custom_granularity_columns_not_found.append(column_name)
|
||||
if custom_granularity_columns_not_found:
|
||||
raise ParsingError(
|
||||
"Time spine custom granularity columns do not exist in the model. "
|
||||
f"Columns not found: {custom_granularity_columns_not_found}; "
|
||||
f"Available columns: {list(column_names_to_columns.keys())}"
|
||||
)
|
||||
|
||||
def get_columns_for_version(self, version: NodeVersion) -> List[UnparsedColumn]:
|
||||
if version not in self._version_map:
|
||||
raise DbtInternalError(
|
||||
@@ -300,6 +345,8 @@ class UnparsedSourceDefinition(dbtClassMixin):
|
||||
tables: List[UnparsedSourceTableDefinition] = field(default_factory=list)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
unrendered_database: Optional[str] = None
|
||||
unrendered_schema: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data):
|
||||
@@ -532,6 +579,13 @@ class UnparsedConversionTypeParams(dbtClassMixin):
|
||||
constant_properties: Optional[List[ConstantPropertyInput]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedCumulativeTypeParams(dbtClassMixin):
|
||||
window: Optional[str] = None
|
||||
grain_to_date: Optional[str] = None
|
||||
period_agg: str = PeriodAggregation.FIRST.value
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedMetricTypeParams(dbtClassMixin):
|
||||
measure: Optional[Union[UnparsedMetricInputMeasure, str]] = None
|
||||
@@ -542,6 +596,7 @@ class UnparsedMetricTypeParams(dbtClassMixin):
|
||||
grain_to_date: Optional[str] = None # str is really a TimeGranularity Enum
|
||||
metrics: Optional[List[Union[UnparsedMetricInput, str]]] = None
|
||||
conversion_type_params: Optional[UnparsedConversionTypeParams] = None
|
||||
cumulative_type_params: Optional[UnparsedCumulativeTypeParams] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -553,6 +608,7 @@ class UnparsedMetric(dbtClassMixin):
|
||||
description: str = ""
|
||||
# Note: `Union` must be the outermost part of the type annotation for serialization to work properly.
|
||||
filter: Union[str, List[str], None] = None
|
||||
time_granularity: Optional[str] = None
|
||||
# metadata: Optional[Unparsedetadata] = None # TODO
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
@@ -664,6 +720,8 @@ class UnparsedQueryParams(dbtClassMixin):
|
||||
group_by: List[str] = field(default_factory=list)
|
||||
# Note: `Union` must be the outermost part of the type annotation for serialization to work properly.
|
||||
where: Union[str, List[str], None] = None
|
||||
order_by: List[str] = field(default_factory=list)
|
||||
limit: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -5,7 +5,6 @@ from mashumaro.jsonschema.annotations import Pattern
|
||||
from mashumaro.types import SerializableType
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.adapters.contracts.connection import QueryComment
|
||||
from dbt.contracts.util import Identifier, list_str
|
||||
from dbt_common.contracts.util import Mergeable
|
||||
@@ -259,6 +258,7 @@ class Project(dbtClassMixin):
|
||||
query_comment: Optional[Union[QueryComment, NoValue, str]] = field(default_factory=NoValue)
|
||||
restrict_access: bool = False
|
||||
dbt_cloud: Optional[Dict[str, Any]] = None
|
||||
flags: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
class Config(dbtMashConfig):
|
||||
# These tell mashumaro to use aliases for jsonschema and for "from_dict"
|
||||
@@ -312,10 +312,6 @@ class Project(dbtClassMixin):
|
||||
raise ValidationError(
|
||||
"Invalid project config: cannot have both 'tests' and 'data_tests' defined"
|
||||
)
|
||||
if "tests" in data:
|
||||
deprecations.warn(
|
||||
"project-test-config", deprecated_path="tests", exp_path="data_tests"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -341,17 +337,29 @@ class ProjectFlags(ExtensibleDbtClassMixin):
|
||||
warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None
|
||||
write_json: Optional[bool] = None
|
||||
|
||||
# legacy behaviors
|
||||
# legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md
|
||||
require_batched_execution_for_custom_microbatch_strategy: bool = False
|
||||
require_explicit_package_overrides_for_builtin_materializations: bool = True
|
||||
require_resource_names_without_spaces: bool = False
|
||||
source_freshness_run_project_hooks: bool = False
|
||||
skip_nodes_if_on_run_start_fails: bool = False
|
||||
state_modified_compare_more_unrendered_values: bool = False
|
||||
state_modified_compare_vars: bool = False
|
||||
require_yaml_configuration_for_mf_time_spines: bool = False
|
||||
require_nested_cumulative_type_params: bool = False
|
||||
|
||||
@property
|
||||
def project_only_flags(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"require_batched_execution_for_custom_microbatch_strategy": self.require_batched_execution_for_custom_microbatch_strategy,
|
||||
"require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations,
|
||||
"require_resource_names_without_spaces": self.require_resource_names_without_spaces,
|
||||
"source_freshness_run_project_hooks": self.source_freshness_run_project_hooks,
|
||||
"skip_nodes_if_on_run_start_fails": self.skip_nodes_if_on_run_start_fails,
|
||||
"state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values,
|
||||
"state_modified_compare_vars": self.state_modified_compare_vars,
|
||||
"require_yaml_configuration_for_mf_time_spines": self.require_yaml_configuration_for_mf_time_spines,
|
||||
"require_nested_cumulative_type_params": self.require_nested_cumulative_type_params,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,9 @@ from dbt.artifacts.schemas.base import VersionedSchema, schema_version
|
||||
from dbt.artifacts.schemas.results import ExecutionResult, TimingInfo
|
||||
from dbt.artifacts.schemas.run import RunExecutionResult, RunResult, RunResultsArtifact
|
||||
from dbt.contracts.graph.nodes import ResultNode
|
||||
from dbt.events.types import ArtifactWritten
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_common.events.functions import fire_event
|
||||
|
||||
TaskTags = Optional[Dict[str, Any]]
|
||||
TaskID = uuid.UUID
|
||||
@@ -29,7 +31,8 @@ class RemoteCompileResult(RemoteCompileResultMixin):
|
||||
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
def error(self) -> None:
|
||||
# TODO: Can we delete this? It's never set anywhere else and never accessed
|
||||
return None
|
||||
|
||||
|
||||
@@ -40,7 +43,7 @@ class RemoteExecutionResult(ExecutionResult):
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
def write(self, path: str):
|
||||
def write(self, path: str) -> None:
|
||||
writable = RunResultsArtifact.from_execution_results(
|
||||
generated_at=self.generated_at,
|
||||
results=self.results,
|
||||
@@ -48,6 +51,7 @@ class RemoteExecutionResult(ExecutionResult):
|
||||
args=self.args,
|
||||
)
|
||||
writable.write(path)
|
||||
fire_event(ArtifactWritten(artifact_type=writable.__class__.__name__, artifact_path=path))
|
||||
|
||||
@classmethod
|
||||
def from_local_result(
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import abc
|
||||
from typing import ClassVar, Dict, List, Optional, Set
|
||||
from typing import Callable, ClassVar, Dict, List, Optional, Set
|
||||
|
||||
import dbt.tracking
|
||||
from dbt.events import types as core_types
|
||||
from dbt_common.events.functions import fire_event, warn_or_error
|
||||
from dbt_common.events.functions import warn_or_error
|
||||
|
||||
|
||||
class DBTDeprecation:
|
||||
@@ -98,24 +98,10 @@ class CollectFreshnessReturnSignature(DBTDeprecation):
|
||||
_event = "CollectFreshnessReturnSignature"
|
||||
|
||||
|
||||
class TestsConfigDeprecation(DBTDeprecation):
|
||||
_name = "project-test-config"
|
||||
_event = "TestsConfigDeprecation"
|
||||
|
||||
|
||||
class ProjectFlagsMovedDeprecation(DBTDeprecation):
|
||||
_name = "project-flags-moved"
|
||||
_event = "ProjectFlagsMovedDeprecation"
|
||||
|
||||
def show(self, *args, **kwargs) -> None:
|
||||
if self.name not in active_deprecations:
|
||||
event = self.event(**kwargs)
|
||||
# We can't do warn_or_error because the ProjectFlags
|
||||
# is where that is set up and we're just reading it.
|
||||
fire_event(event)
|
||||
self.track_deprecation_warn()
|
||||
active_deprecations.add(self.name)
|
||||
|
||||
|
||||
class PackageMaterializationOverrideDeprecation(DBTDeprecation):
|
||||
_name = "package-materialization-override"
|
||||
@@ -132,6 +118,21 @@ class SourceFreshnessProjectHooksNotRun(DBTDeprecation):
|
||||
_event = "SourceFreshnessProjectHooksNotRun"
|
||||
|
||||
|
||||
class MFTimespineWithoutYamlConfigurationDeprecation(DBTDeprecation):
|
||||
_name = "mf-timespine-without-yaml-configuration"
|
||||
_event = "MFTimespineWithoutYamlConfigurationDeprecation"
|
||||
|
||||
|
||||
class MFCumulativeTypeParamsDeprecation(DBTDeprecation):
|
||||
_name = "mf-cumulative-type-params-deprecation"
|
||||
_event = "MFCumulativeTypeParamsDeprecation"
|
||||
|
||||
|
||||
class MicrobatchMacroOutsideOfBatchesDeprecation(DBTDeprecation):
|
||||
_name = "microbatch-macro-outside-of-batches-deprecation"
|
||||
_event = "MicrobatchMacroOutsideOfBatchesDeprecation"
|
||||
|
||||
|
||||
def renamed_env_var(old_name: str, new_name: str):
|
||||
class EnvironmentVariableRenamed(DBTDeprecation):
|
||||
_name = f"environment-variable-renamed:{old_name}"
|
||||
@@ -147,7 +148,7 @@ def renamed_env_var(old_name: str, new_name: str):
|
||||
return cb
|
||||
|
||||
|
||||
def warn(name, *args, **kwargs):
|
||||
def warn(name: str, *args, **kwargs) -> None:
|
||||
if name not in deprecations:
|
||||
# this should (hopefully) never happen
|
||||
raise RuntimeError("Error showing deprecation warning: {}".format(name))
|
||||
@@ -155,6 +156,13 @@ def warn(name, *args, **kwargs):
|
||||
deprecations[name].show(*args, **kwargs)
|
||||
|
||||
|
||||
def buffer(name: str, *args, **kwargs):
|
||||
def show_callback():
|
||||
deprecations[name].show(*args, **kwargs)
|
||||
|
||||
buffered_deprecations.append(show_callback)
|
||||
|
||||
|
||||
# these are globally available
|
||||
# since modules are only imported once, active_deprecations is a singleton
|
||||
|
||||
@@ -169,15 +177,24 @@ deprecations_list: List[DBTDeprecation] = [
|
||||
ConfigLogPathDeprecation(),
|
||||
ConfigTargetPathDeprecation(),
|
||||
CollectFreshnessReturnSignature(),
|
||||
TestsConfigDeprecation(),
|
||||
ProjectFlagsMovedDeprecation(),
|
||||
PackageMaterializationOverrideDeprecation(),
|
||||
ResourceNamesWithSpacesDeprecation(),
|
||||
SourceFreshnessProjectHooksNotRun(),
|
||||
MFTimespineWithoutYamlConfigurationDeprecation(),
|
||||
MFCumulativeTypeParamsDeprecation(),
|
||||
MicrobatchMacroOutsideOfBatchesDeprecation(),
|
||||
]
|
||||
|
||||
deprecations: Dict[str, DBTDeprecation] = {d.name: d for d in deprecations_list}
|
||||
|
||||
buffered_deprecations: List[Callable] = []
|
||||
|
||||
|
||||
def reset_deprecations():
|
||||
active_deprecations.clear()
|
||||
|
||||
|
||||
def fire_buffered_deprecations():
|
||||
[dep_fn() for dep_fn in buffered_deprecations]
|
||||
buffered_deprecations.clear()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Events Module
|
||||
The Events module is responsible for communicating internal dbt structures into a consumable interface. Because the "event" classes are based entirely on protobuf definitions, the interface is really clearly defined, whether or not protobufs are used to consume it. We use Betterproto for compiling the protobuf message definitions into Python classes.
|
||||
The Events module is responsible for communicating internal dbt structures into a consumable interface. Because the "event" classes are based entirely on protobuf definitions, the interface is really clearly defined, whether or not protobufs are used to consume it. We use protoc for compiling the protobuf message definitions into Python classes.
|
||||
|
||||
# Using the Events Module
|
||||
The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `common.events.functions::fire_event` is the entry point to the module from everywhere in dbt.
|
||||
@@ -8,14 +8,14 @@ The event module provides types that represent what is happening in dbt in `even
|
||||
When events are processed via `fire_event`, nearly everything is logged. Whether or not the user has enabled the debug flag, all debug messages are still logged to the file. However, some events are particularly time consuming to construct because they return a huge amount of data. Today, the only messages in this category are cache events and are only logged if the `--log-cache-events` flag is on. This is important because these messages should not be created unless they are going to be logged, because they cause a noticable performance degredation. These events use a "fire_event_if" functions.
|
||||
|
||||
# Adding a New Event
|
||||
* Add a new message in types.proto, and a second message with the same name + "Msg". The "Msg" message should have two fields, an "info" field of EventInfo, and a "data" field referring to the message name without "Msg"
|
||||
* Add a new message in `core_types.proto`, and a second message with the same name + "Msg". The "Msg" message should have two fields, an "info" field of EventInfo, and a "data" field referring to the message name without "Msg"
|
||||
* run the protoc compiler to update core_types_pb2.py: make core_proto_types
|
||||
* Add a wrapping class in core/dbt/event/core_types.py with a Level superclass plus code and message methods
|
||||
* Add the class to tests/unit/test_events.py
|
||||
|
||||
We have switched from using betterproto to using google protobuf, because of a lack of support for Struct fields in betterproto.
|
||||
|
||||
The google protobuf interface is janky and very much non-Pythonic. The "generated" classes in types_pb2.py do not resemble regular Python classes. They do not have normal constructors; they can only be constructed empty. They can be "filled" by setting fields individually or using a json_format method like ParseDict. We have wrapped the logging events with a class (in types.py) which allows using a constructor -- keywords only, no positional parameters.
|
||||
The google protobuf interface is janky and very much non-Pythonic. The "generated" classes in types_pb2.py do not resemble regular Python classes. They do not have normal constructors; they can only be constructed empty. They can be "filled" by setting fields individually or using a json_format method like ParseDict. We have wrapped the logging events with a class (in types.py) which allows using a constructor -- keywords only, no positional parameters.
|
||||
|
||||
## Required for Every Event
|
||||
|
||||
@@ -37,6 +37,6 @@ class PartialParsingDeletedExposure(DebugLevel):
|
||||
|
||||
## Compiling core_types.proto
|
||||
|
||||
After adding a new message in `types.proto`, either:
|
||||
After adding a new message in `core_types.proto`, either:
|
||||
- In the repository root directory: `make core_proto_types`
|
||||
- In the `core/dbt/events` directory: `protoc -I=. --python_out=. types.proto`
|
||||
|
||||
@@ -445,6 +445,30 @@ message SourceFreshnessProjectHooksNotRunMsg {
|
||||
SourceFreshnessProjectHooksNotRun data = 2;
|
||||
}
|
||||
|
||||
// D018
|
||||
message MFTimespineWithoutYamlConfigurationDeprecation {}
|
||||
|
||||
message MFTimespineWithoutYamlConfigurationDeprecationMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MFTimespineWithoutYamlConfigurationDeprecation data = 2;
|
||||
}
|
||||
|
||||
// D019
|
||||
message MFCumulativeTypeParamsDeprecation {}
|
||||
|
||||
message MFCumulativeTypeParamsDeprecationMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MFCumulativeTypeParamsDeprecation data = 2;
|
||||
}
|
||||
|
||||
// D020
|
||||
message MicrobatchMacroOutsideOfBatchesDeprecation {}
|
||||
|
||||
message MicrobatchMacroOutsideOfBatchesDeprecationMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MicrobatchMacroOutsideOfBatchesDeprecation data = 2;
|
||||
}
|
||||
|
||||
// I065
|
||||
message DeprecatedModel {
|
||||
string model_name = 1;
|
||||
@@ -893,6 +917,17 @@ message FreshnessConfigProblemMsg {
|
||||
}
|
||||
|
||||
|
||||
// I074
|
||||
message MicrobatchModelNoEventTimeInputs {
|
||||
string model_name = 1;
|
||||
}
|
||||
|
||||
message MicrobatchModelNoEventTimeInputsMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MicrobatchModelNoEventTimeInputs data = 2;
|
||||
}
|
||||
|
||||
|
||||
// M - Deps generation
|
||||
|
||||
|
||||
@@ -1217,6 +1252,19 @@ message DepsScrubbedPackageNameMsg{
|
||||
DepsScrubbedPackageName data = 2;
|
||||
}
|
||||
|
||||
// P - Artifacts
|
||||
|
||||
// P001
|
||||
message ArtifactWritten {
|
||||
string artifact_type = 1;
|
||||
string artifact_path = 2;
|
||||
}
|
||||
|
||||
message ArtifactWrittenMsg {
|
||||
CoreEventInfo info = 1;
|
||||
ArtifactWritten data = 2;
|
||||
}
|
||||
|
||||
// Q - Node execution
|
||||
|
||||
// Q001
|
||||
@@ -1271,6 +1319,12 @@ message SQLRunnerExceptionMsg {
|
||||
SQLRunnerException data = 2;
|
||||
}
|
||||
|
||||
message Group {
|
||||
string name = 1;
|
||||
string package_name = 3;
|
||||
map<string, string> owner = 7;
|
||||
}
|
||||
|
||||
// Q007
|
||||
message LogTestResult {
|
||||
NodeInfo node_info = 1;
|
||||
@@ -1280,6 +1334,8 @@ message LogTestResult {
|
||||
int32 num_models = 5;
|
||||
float execution_time = 6;
|
||||
int32 num_failures = 7;
|
||||
Group group = 8;
|
||||
string attached_node = 9;
|
||||
}
|
||||
|
||||
message LogTestResultMsg {
|
||||
@@ -1312,6 +1368,7 @@ message LogModelResult {
|
||||
int32 index = 4;
|
||||
int32 total = 5;
|
||||
float execution_time = 6;
|
||||
Group group = 7;
|
||||
}
|
||||
|
||||
message LogModelResultMsg {
|
||||
@@ -1373,7 +1430,7 @@ message LogFreshnessResultMsg {
|
||||
LogFreshnessResult data = 2;
|
||||
}
|
||||
|
||||
// Q018
|
||||
// Q019
|
||||
message LogNodeNoOpResult {
|
||||
NodeInfo node_info = 1;
|
||||
string description = 2;
|
||||
@@ -1589,6 +1646,7 @@ message ShowNode {
|
||||
bool is_inline = 3;
|
||||
string output_format = 4;
|
||||
string unique_id = 5;
|
||||
bool quiet = 6;
|
||||
}
|
||||
|
||||
message ShowNodeMsg {
|
||||
@@ -1603,6 +1661,7 @@ message CompiledNode {
|
||||
bool is_inline = 3;
|
||||
string output_format = 4;
|
||||
string unique_id = 5;
|
||||
bool quiet = 6;
|
||||
}
|
||||
|
||||
message CompiledNodeMsg {
|
||||
@@ -1610,6 +1669,27 @@ message CompiledNodeMsg {
|
||||
CompiledNode data = 2;
|
||||
}
|
||||
|
||||
// Q043
|
||||
message SnapshotTimestampWarning {
|
||||
string snapshot_time_data_type = 1;
|
||||
string updated_at_data_type = 2;
|
||||
}
|
||||
|
||||
message SnapshotTimestampWarningMsg {
|
||||
CoreEventInfo info = 1;
|
||||
SnapshotTimestampWarning data = 2;
|
||||
}
|
||||
|
||||
// Q044
|
||||
message MicrobatchExecutionDebug {
|
||||
string msg = 1;
|
||||
}
|
||||
|
||||
message MicrobatchExecutionDebugMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MicrobatchExecutionDebug data = 2;
|
||||
}
|
||||
|
||||
// W - Node testing
|
||||
|
||||
// Skipped W001
|
||||
@@ -1815,6 +1895,7 @@ message RunResultWarning {
|
||||
string node_name = 2;
|
||||
string path = 3;
|
||||
NodeInfo node_info = 4;
|
||||
Group group = 5;
|
||||
}
|
||||
|
||||
message RunResultWarningMsg {
|
||||
@@ -1828,6 +1909,7 @@ message RunResultFailure {
|
||||
string node_name = 2;
|
||||
string path = 3;
|
||||
NodeInfo node_info = 4;
|
||||
Group group = 5;
|
||||
}
|
||||
|
||||
message RunResultFailureMsg {
|
||||
@@ -1849,6 +1931,7 @@ message StatsLineMsg {
|
||||
message RunResultError {
|
||||
string msg = 1;
|
||||
NodeInfo node_info = 2;
|
||||
Group group = 3;
|
||||
}
|
||||
|
||||
message RunResultErrorMsg {
|
||||
@@ -1896,6 +1979,7 @@ message EndOfRunSummary {
|
||||
int32 num_errors = 1;
|
||||
int32 num_warnings = 2;
|
||||
bool keyboard_interrupt = 3;
|
||||
int32 num_partial_success = 4;
|
||||
}
|
||||
|
||||
message EndOfRunSummaryMsg {
|
||||
@@ -1903,7 +1987,19 @@ message EndOfRunSummaryMsg {
|
||||
EndOfRunSummary data = 2;
|
||||
}
|
||||
|
||||
// Skipped Z031, Z032, Z033
|
||||
// Skipped Z031, Z032
|
||||
|
||||
// Z033
|
||||
message MarkSkippedChildren {
|
||||
string unique_id = 1;
|
||||
string status = 2;
|
||||
RunResultMsg run_result = 3;
|
||||
}
|
||||
|
||||
message MarkSkippedChildrenMsg {
|
||||
CoreEventInfo info = 1;
|
||||
MarkSkippedChildren data = 2;
|
||||
}
|
||||
|
||||
// Z034
|
||||
message LogSkipBecauseError {
|
||||
@@ -1911,6 +2007,7 @@ message LogSkipBecauseError {
|
||||
string relation = 2;
|
||||
int32 index = 3;
|
||||
int32 total = 4;
|
||||
string status = 5;
|
||||
}
|
||||
|
||||
message LogSkipBecauseErrorMsg {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -2,8 +2,10 @@ import os
|
||||
from functools import partial
|
||||
from typing import Callable, List
|
||||
|
||||
from dbt.tracking import track_behavior_change_warn
|
||||
from dbt_common.events.base_types import EventLevel, EventMsg
|
||||
from dbt_common.events.event_manager_client import (
|
||||
add_callback_to_manager,
|
||||
add_logger_to_manager,
|
||||
cleanup_event_logger,
|
||||
get_event_manager,
|
||||
@@ -68,15 +70,14 @@ def setup_event_logger(flags, callbacks: List[Callable[[EventMsg], None]] = [])
|
||||
make_log_dir_if_missing(flags.LOG_PATH)
|
||||
event_manager = get_event_manager()
|
||||
event_manager.callbacks = callbacks.copy()
|
||||
add_callback_to_manager(track_behavior_change_warn)
|
||||
|
||||
if flags.LOG_LEVEL != "none":
|
||||
line_format = _line_format_from_str(flags.LOG_FORMAT, LineFormat.PlainText)
|
||||
log_level = (
|
||||
EventLevel.ERROR
|
||||
if flags.QUIET
|
||||
else EventLevel.DEBUG
|
||||
if flags.DEBUG
|
||||
else EventLevel(flags.LOG_LEVEL)
|
||||
else EventLevel.DEBUG if flags.DEBUG else EventLevel(flags.LOG_LEVEL)
|
||||
)
|
||||
console_config = get_stdout_config(
|
||||
line_format,
|
||||
|
||||
@@ -388,6 +388,9 @@ class ConfigTargetPathDeprecation(WarnLevel):
|
||||
return line_wrap_message(warning_tag(f"Deprecated functionality\n\n{description}"))
|
||||
|
||||
|
||||
# Note: this deprecation has been removed, but we are leaving
|
||||
# the event class here, because users may have specified it in
|
||||
# warn_error_options.
|
||||
class TestsConfigDeprecation(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "D012"
|
||||
@@ -463,6 +466,36 @@ class SourceFreshnessProjectHooksNotRun(WarnLevel):
|
||||
return line_wrap_message(warning_tag(description))
|
||||
|
||||
|
||||
class MFTimespineWithoutYamlConfigurationDeprecation(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "D018"
|
||||
|
||||
def message(self) -> str:
|
||||
description = "Time spines without YAML configuration are in the process of deprecation. Please add YAML configuration for your 'metricflow_time_spine' model. See documentation on MetricFlow time spines: https://docs.getdbt.com/docs/build/metricflow-time-spine and behavior change documentation: https://docs.getdbt.com/reference/global-configs/behavior-changes."
|
||||
|
||||
return line_wrap_message(warning_tag(description))
|
||||
|
||||
|
||||
class MFCumulativeTypeParamsDeprecation(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "D019"
|
||||
|
||||
def message(self) -> str:
|
||||
description = "Cumulative fields `type_params.window` and `type_params.grain_to_date` have been moved and will soon be deprecated. Please nest those values under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes."
|
||||
|
||||
return line_wrap_message(warning_tag(description))
|
||||
|
||||
|
||||
class MicrobatchMacroOutsideOfBatchesDeprecation(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "D020"
|
||||
|
||||
def message(self) -> str:
|
||||
description = "The use of a custom microbatch macro outside of batched execution is deprecated. To use it with batched execution, set `flags.require_batched_execution_for_custom_microbatch_strategy` to `True` in `dbt_project.yml`. In the future this will be the default behavior."
|
||||
|
||||
return line_wrap_message(warning_tag(description))
|
||||
|
||||
|
||||
# =======================================================
|
||||
# I - Project parsing
|
||||
# =======================================================
|
||||
@@ -921,6 +954,19 @@ class FreshnessConfigProblem(WarnLevel):
|
||||
return self.msg
|
||||
|
||||
|
||||
class MicrobatchModelNoEventTimeInputs(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "I074"
|
||||
|
||||
def message(self) -> str:
|
||||
msg = (
|
||||
f"The microbatch model '{self.model_name}' has no 'ref' or 'source' input with an 'event_time' configuration. "
|
||||
"\nThis means no filtering can be applied and can result in unexpected duplicate records in the resulting microbatch model."
|
||||
)
|
||||
|
||||
return warning_tag(msg)
|
||||
|
||||
|
||||
# =======================================================
|
||||
# M - Deps generation
|
||||
# =======================================================
|
||||
@@ -1184,6 +1230,19 @@ class DepsScrubbedPackageName(WarnLevel):
|
||||
return f"Detected secret env var in {self.package_name}. dbt will write a scrubbed representation to the lock file. This will cause issues with subsequent 'dbt deps' using the lock file, requiring 'dbt deps --upgrade'"
|
||||
|
||||
|
||||
# =======================================================
|
||||
# P - Artifacts
|
||||
# =======================================================
|
||||
|
||||
|
||||
class ArtifactWritten(DebugLevel):
|
||||
def code(self):
|
||||
return "P001"
|
||||
|
||||
def message(self) -> str:
|
||||
return f"Wrote artifact {self.artifact_type} to {self.artifact_path}"
|
||||
|
||||
|
||||
# =======================================================
|
||||
# Q - Node execution
|
||||
# =======================================================
|
||||
@@ -1293,6 +1352,9 @@ class LogModelResult(DynamicLevel):
|
||||
if self.status == "error":
|
||||
info = "ERROR creating"
|
||||
status = red(self.status.upper())
|
||||
elif "PARTIAL SUCCESS" in self.status:
|
||||
info = "PARTIALLY created"
|
||||
status = yellow(self.status.upper())
|
||||
else:
|
||||
info = "OK created"
|
||||
status = green(self.status)
|
||||
@@ -1510,10 +1572,20 @@ class LogHookEndLine(InfoLevel):
|
||||
return "Q033"
|
||||
|
||||
def message(self) -> str:
|
||||
msg = f"OK hook: {self.statement}"
|
||||
if self.status == "success":
|
||||
info = "OK"
|
||||
status = green(info)
|
||||
elif self.status == "skipped":
|
||||
info = "SKIP"
|
||||
status = yellow(info)
|
||||
else:
|
||||
info = "ERROR"
|
||||
status = red(info)
|
||||
msg = f"{info} hook: {self.statement}"
|
||||
|
||||
return format_fancy_output_line(
|
||||
msg=msg,
|
||||
status=green(self.status),
|
||||
status=status,
|
||||
index=self.index,
|
||||
total=self.total,
|
||||
execution_time=self.execution_time,
|
||||
@@ -1591,7 +1663,9 @@ class ShowNode(InfoLevel):
|
||||
{"node": self.node_name, "show": json.loads(self.preview)}, indent=2
|
||||
)
|
||||
else:
|
||||
if self.is_inline:
|
||||
if self.quiet:
|
||||
return self.preview
|
||||
elif self.is_inline:
|
||||
return f"Previewing inline node:\n{self.preview}"
|
||||
else:
|
||||
return f"Previewing node '{self.node_name}':\n{self.preview}"
|
||||
@@ -1608,12 +1682,34 @@ class CompiledNode(InfoLevel):
|
||||
else:
|
||||
return json.dumps({"node": self.node_name, "compiled": self.compiled}, indent=2)
|
||||
else:
|
||||
if self.is_inline:
|
||||
if self.quiet:
|
||||
return self.compiled
|
||||
elif self.is_inline:
|
||||
return f"Compiled inline node is:\n{self.compiled}"
|
||||
else:
|
||||
return f"Compiled node '{self.node_name}' is:\n{self.compiled}"
|
||||
|
||||
|
||||
class SnapshotTimestampWarning(WarnLevel):
|
||||
def code(self) -> str:
|
||||
return "Q043"
|
||||
|
||||
def message(self) -> str:
|
||||
return (
|
||||
f"Data type of snapshot table timestamp columns ({self.snapshot_time_data_type}) "
|
||||
f"doesn't match derived column 'updated_at' ({self.updated_at_data_type}). "
|
||||
"Please update snapshot config 'updated_at'."
|
||||
)
|
||||
|
||||
|
||||
class MicrobatchExecutionDebug(DebugLevel):
|
||||
def code(self) -> str:
|
||||
return "Q044"
|
||||
|
||||
def message(self) -> str:
|
||||
return self.msg
|
||||
|
||||
|
||||
# =======================================================
|
||||
# W - Node testing
|
||||
# =======================================================
|
||||
@@ -1845,10 +1941,16 @@ class EndOfRunSummary(InfoLevel):
|
||||
def message(self) -> str:
|
||||
error_plural = pluralize(self.num_errors, "error")
|
||||
warn_plural = pluralize(self.num_warnings, "warning")
|
||||
partial_success_plural = f"""{self.num_partial_success} partial {"success" if self.num_partial_success == 1 else "successes"}"""
|
||||
|
||||
if self.keyboard_interrupt:
|
||||
message = yellow("Exited because of keyboard interrupt")
|
||||
elif self.num_errors > 0:
|
||||
message = red(f"Completed with {error_plural} and {warn_plural}:")
|
||||
message = red(
|
||||
f"Completed with {error_plural}, {partial_success_plural}, and {warn_plural}:"
|
||||
)
|
||||
elif self.num_partial_success > 0:
|
||||
message = yellow(f"Completed with {partial_success_plural} and {warn_plural}")
|
||||
elif self.num_warnings > 0:
|
||||
message = yellow(f"Completed with {warn_plural}:")
|
||||
else:
|
||||
@@ -1856,7 +1958,21 @@ class EndOfRunSummary(InfoLevel):
|
||||
return message
|
||||
|
||||
|
||||
# Skipped Z031, Z032, Z033
|
||||
# Skipped Z031, Z032
|
||||
|
||||
|
||||
class MarkSkippedChildren(DebugLevel):
|
||||
def code(self) -> str:
|
||||
return "Z033"
|
||||
|
||||
def message(self) -> str:
|
||||
msg = (
|
||||
f"Marking all children of '{self.unique_id}' to be skipped "
|
||||
f"because of status '{self.status}'. "
|
||||
)
|
||||
if self.run_result.message:
|
||||
msg = msg + f" Reason: {self.run_result.message}."
|
||||
return msg
|
||||
|
||||
|
||||
class LogSkipBecauseError(ErrorLevel):
|
||||
@@ -1864,7 +1980,7 @@ class LogSkipBecauseError(ErrorLevel):
|
||||
return "Z034"
|
||||
|
||||
def message(self) -> str:
|
||||
msg = f"SKIP relation {self.schema}.{self.relation} due to ephemeral model error"
|
||||
msg = f"SKIP relation {self.schema}.{self.relation} due to ephemeral model status '{self.status}'"
|
||||
return format_fancy_output_line(
|
||||
msg=msg, status=red("ERROR SKIP"), index=self.index, total=self.total
|
||||
)
|
||||
|
||||
@@ -136,6 +136,18 @@ class GraphDependencyNotFoundError(CompilationError):
|
||||
return msg
|
||||
|
||||
|
||||
class ForeignKeyConstraintToSyntaxError(CompilationError):
|
||||
def __init__(self, node, expression: str) -> None:
|
||||
self.expression = expression
|
||||
self.node = node
|
||||
super().__init__(msg=self.get_message())
|
||||
|
||||
def get_message(self) -> str:
|
||||
msg = f"'{self.node.unique_id}' defines a foreign key constraint 'to' expression which is not valid 'ref' or 'source' syntax: {self.expression}."
|
||||
|
||||
return msg
|
||||
|
||||
|
||||
# client level exceptions
|
||||
|
||||
|
||||
|
||||
@@ -68,6 +68,7 @@ def get_flag_dict():
|
||||
"target_path",
|
||||
"log_path",
|
||||
"invocation_command",
|
||||
"empty",
|
||||
}
|
||||
return {key: getattr(GLOBAL_FLAGS, key.upper(), None) for key in flag_attr}
|
||||
|
||||
|
||||
@@ -59,18 +59,40 @@ class Graph:
|
||||
def select_children(
|
||||
self, selected: Set[UniqueId], max_depth: Optional[int] = None
|
||||
) -> Set[UniqueId]:
|
||||
descendants: Set[UniqueId] = set()
|
||||
for node in selected:
|
||||
descendants.update(self.descendants(node, max_depth))
|
||||
return descendants
|
||||
"""Returns all nodes which are descendants of the 'selected' set.
|
||||
Nodes in the 'selected' set are counted as children only if
|
||||
they are descendants of other nodes in the 'selected' set."""
|
||||
children: Set[UniqueId] = set()
|
||||
i = 0
|
||||
while len(selected) > 0 and (max_depth is None or i < max_depth):
|
||||
next_layer: Set[UniqueId] = set()
|
||||
for node in selected:
|
||||
next_layer.update(self.descendants(node, 1))
|
||||
next_layer = next_layer - children # Avoid re-searching
|
||||
children.update(next_layer)
|
||||
selected = next_layer
|
||||
i += 1
|
||||
|
||||
return children
|
||||
|
||||
def select_parents(
|
||||
self, selected: Set[UniqueId], max_depth: Optional[int] = None
|
||||
) -> Set[UniqueId]:
|
||||
ancestors: Set[UniqueId] = set()
|
||||
for node in selected:
|
||||
ancestors.update(self.ancestors(node, max_depth))
|
||||
return ancestors
|
||||
"""Returns all nodes which are ancestors of the 'selected' set.
|
||||
Nodes in the 'selected' set are counted as parents only if
|
||||
they are ancestors of other nodes in the 'selected' set."""
|
||||
parents: Set[UniqueId] = set()
|
||||
i = 0
|
||||
while len(selected) > 0 and (max_depth is None or i < max_depth):
|
||||
next_layer: Set[UniqueId] = set()
|
||||
for node in selected:
|
||||
next_layer.update(self.ancestors(node, 1))
|
||||
next_layer = next_layer - parents # Avoid re-searching
|
||||
parents.update(next_layer)
|
||||
selected = next_layer
|
||||
i += 1
|
||||
|
||||
return parents
|
||||
|
||||
def select_successors(self, selected: Set[UniqueId]) -> Set[UniqueId]:
|
||||
successors: Set[UniqueId] = set()
|
||||
|
||||
@@ -25,8 +25,15 @@ class GraphQueue:
|
||||
the same time, as there is an unlocked race!
|
||||
"""
|
||||
|
||||
def __init__(self, graph: nx.DiGraph, manifest: Manifest, selected: Set[UniqueId]) -> None:
|
||||
self.graph = graph
|
||||
def __init__(
|
||||
self,
|
||||
graph: nx.DiGraph,
|
||||
manifest: Manifest,
|
||||
selected: Set[UniqueId],
|
||||
preserve_edges: bool = True,
|
||||
) -> None:
|
||||
# 'create_empty_copy' returns a copy of the graph G with all of the edges removed, and leaves nodes intact.
|
||||
self.graph = graph if preserve_edges else nx.classes.function.create_empty_copy(graph)
|
||||
self.manifest = manifest
|
||||
self._selected = selected
|
||||
# store the queue as a priority queue.
|
||||
|
||||
@@ -87,12 +87,15 @@ class NodeSelector(MethodManager):
|
||||
)
|
||||
return set(), set()
|
||||
|
||||
neighbors = self.collect_specified_neighbors(spec, collected)
|
||||
selected = collected | neighbors
|
||||
|
||||
# if --indirect-selection EMPTY, do not expand to adjacent tests
|
||||
if spec.indirect_selection == IndirectSelection.Empty:
|
||||
return collected, set()
|
||||
return selected, set()
|
||||
else:
|
||||
neighbors = self.collect_specified_neighbors(spec, collected)
|
||||
direct_nodes, indirect_nodes = self.expand_selection(
|
||||
selected=(collected | neighbors), indirect_selection=spec.indirect_selection
|
||||
selected=selected, indirect_selection=spec.indirect_selection
|
||||
)
|
||||
return direct_nodes, indirect_nodes
|
||||
|
||||
@@ -170,17 +173,22 @@ class NodeSelector(MethodManager):
|
||||
semantic_model = self.manifest.semantic_models[unique_id]
|
||||
return semantic_model.config.enabled
|
||||
elif unique_id in self.manifest.unit_tests:
|
||||
return True
|
||||
unit_test = self.manifest.unit_tests[unique_id]
|
||||
return unit_test.config.enabled
|
||||
elif unique_id in self.manifest.saved_queries:
|
||||
saved_query = self.manifest.saved_queries[unique_id]
|
||||
return saved_query.config.enabled
|
||||
|
||||
node = self.manifest.nodes[unique_id]
|
||||
|
||||
if self.include_empty_nodes:
|
||||
return node.config.enabled
|
||||
return node.config.enabled
|
||||
|
||||
def _is_empty_node(self, unique_id: UniqueId) -> bool:
|
||||
if unique_id in self.manifest.nodes:
|
||||
node = self.manifest.nodes[unique_id]
|
||||
return node.empty
|
||||
else:
|
||||
return not node.empty and node.config.enabled
|
||||
return False
|
||||
|
||||
def node_is_match(self, node: GraphMemberNode) -> bool:
|
||||
"""Determine if a node is a match for the selector. Non-match nodes
|
||||
@@ -212,7 +220,12 @@ class NodeSelector(MethodManager):
|
||||
"""Return the subset of selected nodes that is a match for this
|
||||
selector.
|
||||
"""
|
||||
return {unique_id for unique_id in selected if self._is_match(unique_id)}
|
||||
return {
|
||||
unique_id
|
||||
for unique_id in selected
|
||||
if self._is_match(unique_id)
|
||||
and (self.include_empty_nodes or not self._is_empty_node(unique_id))
|
||||
}
|
||||
|
||||
def expand_selection(
|
||||
self,
|
||||
@@ -319,18 +332,18 @@ class NodeSelector(MethodManager):
|
||||
|
||||
return filtered_nodes
|
||||
|
||||
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:
|
||||
def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
|
||||
"""Returns a queue over nodes in the graph that tracks progress of
|
||||
dependecies.
|
||||
dependencies.
|
||||
"""
|
||||
# Filtering hapens in get_selected
|
||||
# Filtering happens in get_selected
|
||||
selected_nodes = self.get_selected(spec)
|
||||
# Save to global variable
|
||||
selected_resources.set_selected_resources(selected_nodes)
|
||||
# Construct a new graph using the selected_nodes
|
||||
new_graph = self.full_graph.get_subset_graph(selected_nodes)
|
||||
# should we give a way here for consumers to mutate the graph?
|
||||
return GraphQueue(new_graph.graph, self.manifest, selected_nodes)
|
||||
return GraphQueue(new_graph.graph, self.manifest, selected_nodes, preserve_edges)
|
||||
|
||||
|
||||
class ResourceTypeSelector(NodeSelector):
|
||||
|
||||
@@ -109,7 +109,7 @@ def is_selected_node(fqn: List[str], node_selector: str, is_versioned: bool) ->
|
||||
|
||||
|
||||
SelectorTarget = Union[
|
||||
SourceDefinition, ManifestNode, Exposure, Metric, SemanticModel, UnitTestDefinition
|
||||
SourceDefinition, ManifestNode, Exposure, Metric, SemanticModel, UnitTestDefinition, SavedQuery
|
||||
]
|
||||
|
||||
|
||||
@@ -202,6 +202,7 @@ class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
self.metric_nodes(included_nodes),
|
||||
self.unit_tests(included_nodes),
|
||||
self.semantic_model_nodes(included_nodes),
|
||||
self.saved_query_nodes(included_nodes),
|
||||
)
|
||||
|
||||
def configurable_nodes(
|
||||
@@ -680,7 +681,8 @@ class StateSelectorMethod(SelectorMethod):
|
||||
self, old: Optional[SelectorTarget], new: SelectorTarget, adapter_type: str
|
||||
) -> bool:
|
||||
if isinstance(
|
||||
new, (SourceDefinition, Exposure, Metric, SemanticModel, UnitTestDefinition)
|
||||
new,
|
||||
(SourceDefinition, Exposure, Metric, SemanticModel, UnitTestDefinition, SavedQuery),
|
||||
):
|
||||
# these all overwrite `same_contents`
|
||||
different_contents = not new.same_contents(old) # type: ignore
|
||||
@@ -719,7 +721,9 @@ class StateSelectorMethod(SelectorMethod):
|
||||
) -> Callable[[Optional[SelectorTarget], SelectorTarget], bool]:
|
||||
# get a function that compares two selector target based on compare method provided
|
||||
def check_modified_contract(old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
|
||||
if hasattr(new, compare_method):
|
||||
if new is None and hasattr(old, compare_method + "_removed"):
|
||||
return getattr(old, compare_method + "_removed")()
|
||||
elif hasattr(new, compare_method):
|
||||
# when old body does not exist or old and new are not the same
|
||||
return not old or not getattr(new, compare_method)(old, adapter_type) # type: ignore
|
||||
else:
|
||||
@@ -773,6 +777,8 @@ class StateSelectorMethod(SelectorMethod):
|
||||
previous_node = SemanticModel.from_resource(manifest.semantic_models[unique_id])
|
||||
elif unique_id in manifest.unit_tests:
|
||||
previous_node = UnitTestDefinition.from_resource(manifest.unit_tests[unique_id])
|
||||
elif unique_id in manifest.saved_queries:
|
||||
previous_node = SavedQuery.from_resource(manifest.saved_queries[unique_id])
|
||||
|
||||
keyword_args = {}
|
||||
if checker.__name__ in [
|
||||
@@ -785,6 +791,22 @@ class StateSelectorMethod(SelectorMethod):
|
||||
if checker(previous_node, node, **keyword_args): # type: ignore
|
||||
yield unique_id
|
||||
|
||||
# checkers that can handle removed nodes
|
||||
if checker.__name__ in ["check_modified_contract"]:
|
||||
# ignore included_nodes, since those cannot contain removed nodes
|
||||
for previous_unique_id, previous_node in manifest.nodes.items():
|
||||
# detect removed (deleted, renamed, or disabled) nodes
|
||||
removed_node = None
|
||||
if previous_unique_id in self.manifest.disabled.keys():
|
||||
removed_node = self.manifest.disabled[previous_unique_id][0]
|
||||
elif previous_unique_id not in self.manifest.nodes.keys():
|
||||
removed_node = previous_node
|
||||
|
||||
if removed_node:
|
||||
# do not yield -- removed nodes should never be selected for downstream execution
|
||||
# as they are not part of the current project's manifest.nodes
|
||||
checker(removed_node, None, **keyword_args) # type: ignore
|
||||
|
||||
|
||||
class ResultSelectorMethod(SelectorMethod):
|
||||
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:
|
||||
|
||||
0
core/dbt/materializations/__init__.py
Normal file
0
core/dbt/materializations/__init__.py
Normal file
0
core/dbt/materializations/incremental/__init__.py
Normal file
0
core/dbt/materializations/incremental/__init__.py
Normal file
223
core/dbt/materializations/incremental/microbatch.py
Normal file
223
core/dbt/materializations/incremental/microbatch.py
Normal file
@@ -0,0 +1,223 @@
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pytz
|
||||
|
||||
from dbt.artifacts.resources.types import BatchSize
|
||||
from dbt.artifacts.schemas.batch_results import BatchType
|
||||
from dbt.contracts.graph.nodes import ModelNode, NodeConfig
|
||||
from dbt.exceptions import DbtInternalError, DbtRuntimeError
|
||||
|
||||
|
||||
class MicrobatchBuilder:
|
||||
"""A utility class for building microbatch definitions associated with a specific model"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: ModelNode,
|
||||
is_incremental: bool,
|
||||
event_time_start: Optional[datetime],
|
||||
event_time_end: Optional[datetime],
|
||||
default_end_time: Optional[datetime] = None,
|
||||
):
|
||||
if model.config.incremental_strategy != "microbatch":
|
||||
raise DbtInternalError(
|
||||
f"Model '{model.name}' does not use 'microbatch' incremental_strategy."
|
||||
)
|
||||
self.model = model
|
||||
|
||||
if self.model.config.batch_size is None:
|
||||
raise DbtRuntimeError(
|
||||
f"Microbatch model '{self.model.name}' does not have a 'batch_size' config (one of {[batch_size.value for batch_size in BatchSize]}) specificed."
|
||||
)
|
||||
|
||||
self.is_incremental = is_incremental
|
||||
self.event_time_start = (
|
||||
event_time_start.replace(tzinfo=pytz.UTC) if event_time_start else None
|
||||
)
|
||||
self.event_time_end = event_time_end.replace(tzinfo=pytz.UTC) if event_time_end else None
|
||||
self.default_end_time = default_end_time or datetime.now(pytz.UTC)
|
||||
|
||||
def build_end_time(self):
|
||||
"""Defaults the end_time to the current time in UTC unless a non `None` event_time_end was provided"""
|
||||
end_time = self.event_time_end or self.default_end_time
|
||||
return MicrobatchBuilder.ceiling_timestamp(end_time, self.model.config.batch_size)
|
||||
|
||||
def build_start_time(self, checkpoint: Optional[datetime]):
|
||||
"""Create a start time based off the passed in checkpoint.
|
||||
|
||||
If the checkpoint is `None`, or this is the first run of a microbatch model, then the
|
||||
model's configured `begin` value will be returned as a checkpoint is necessary
|
||||
to build a start time. This is because we build the start time relative to the checkpoint
|
||||
via the batchsize and offset, and we cannot offset a checkpoint if there is no checkpoint.
|
||||
"""
|
||||
assert isinstance(self.model.config, NodeConfig)
|
||||
batch_size = self.model.config.batch_size
|
||||
|
||||
# Use event_time_start if it is provided.
|
||||
if self.event_time_start:
|
||||
return MicrobatchBuilder.truncate_timestamp(self.event_time_start, batch_size)
|
||||
|
||||
# First run, use model's configured 'begin' as start.
|
||||
if not self.is_incremental or checkpoint is None:
|
||||
if not self.model.config.begin:
|
||||
raise DbtRuntimeError(
|
||||
f"Microbatch model '{self.model.name}' requires a 'begin' configuration."
|
||||
)
|
||||
|
||||
return MicrobatchBuilder.truncate_timestamp(self.model.config.begin, batch_size)
|
||||
|
||||
lookback = self.model.config.lookback
|
||||
|
||||
# If the checkpoint is equivalent to itself truncated then the checkpoint stradles
|
||||
# the batch line. In this case the last batch will end with the checkpoint, but start
|
||||
# should be the previous hour/day/month/year. Thus we need to increase the lookback by
|
||||
# 1 to get this affect properly.
|
||||
if checkpoint == MicrobatchBuilder.truncate_timestamp(checkpoint, batch_size):
|
||||
lookback += 1
|
||||
|
||||
return MicrobatchBuilder.offset_timestamp(checkpoint, batch_size, -1 * lookback)
|
||||
|
||||
def build_batches(self, start: datetime, end: datetime) -> List[BatchType]:
|
||||
"""
|
||||
Given a start and end datetime, builds a list of batches where each batch is
|
||||
the size of the model's batch_size.
|
||||
"""
|
||||
batch_size = self.model.config.batch_size
|
||||
curr_batch_start: datetime = start
|
||||
curr_batch_end: datetime = MicrobatchBuilder.offset_timestamp(
|
||||
curr_batch_start, batch_size, 1
|
||||
)
|
||||
|
||||
batches: List[BatchType] = [(curr_batch_start, curr_batch_end)]
|
||||
while curr_batch_end < end:
|
||||
curr_batch_start = curr_batch_end
|
||||
curr_batch_end = MicrobatchBuilder.offset_timestamp(curr_batch_start, batch_size, 1)
|
||||
batches.append((curr_batch_start, curr_batch_end))
|
||||
|
||||
# use exact end value as stop
|
||||
batches[-1] = (batches[-1][0], end)
|
||||
|
||||
return batches
|
||||
|
||||
def build_jinja_context_for_batch(self, incremental_batch: bool) -> Dict[str, Any]:
|
||||
"""
|
||||
Create context with entries that reflect microbatch model + incremental execution state
|
||||
|
||||
Assumes self.model has been (re)-compiled with necessary batch filters applied.
|
||||
"""
|
||||
jinja_context: Dict[str, Any] = {}
|
||||
|
||||
# Microbatch model properties
|
||||
jinja_context["model"] = self.model.to_dict()
|
||||
jinja_context["sql"] = self.model.compiled_code
|
||||
jinja_context["compiled_code"] = self.model.compiled_code
|
||||
|
||||
# Add incremental context variables for batches running incrementally
|
||||
if incremental_batch:
|
||||
jinja_context["is_incremental"] = lambda: True
|
||||
jinja_context["should_full_refresh"] = lambda: False
|
||||
|
||||
return jinja_context
|
||||
|
||||
@staticmethod
|
||||
def offset_timestamp(timestamp: datetime, batch_size: BatchSize, offset: int) -> datetime:
|
||||
"""Truncates the passed in timestamp based on the batch_size and then applies the offset by the batch_size.
|
||||
|
||||
Note: It's important to understand that the offset applies to the truncated timestamp, not
|
||||
the origin timestamp. Thus being offset by a day isn't relative to the any given hour that day,
|
||||
but relative to the start of the day. So if the timestamp is the very end of a day, 2024-09-17 23:59:59,
|
||||
you have a batch size of a day, and an offset of +1, then the returned value ends up being only one
|
||||
second later, 2024-09-18 00:00:00.
|
||||
|
||||
2024-09-17 16:06:00 + Batchsize.hour -1 -> 2024-09-17 15:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.hour +1 -> 2024-09-17 17:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day -1 -> 2024-09-16 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day +1 -> 2024-09-18 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month -1 -> 2024-08-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month +1 -> 2024-10-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year -1 -> 2023-01-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year +1 -> 2025-01-01 00:00:00
|
||||
"""
|
||||
truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size)
|
||||
|
||||
offset_timestamp: datetime
|
||||
if batch_size == BatchSize.hour:
|
||||
offset_timestamp = truncated + timedelta(hours=offset)
|
||||
elif batch_size == BatchSize.day:
|
||||
offset_timestamp = truncated + timedelta(days=offset)
|
||||
elif batch_size == BatchSize.month:
|
||||
offset_timestamp = truncated
|
||||
for _ in range(abs(offset)):
|
||||
if offset < 0:
|
||||
offset_timestamp = offset_timestamp - timedelta(days=1)
|
||||
else:
|
||||
offset_timestamp = offset_timestamp + timedelta(days=31)
|
||||
offset_timestamp = MicrobatchBuilder.truncate_timestamp(
|
||||
offset_timestamp, batch_size
|
||||
)
|
||||
elif batch_size == BatchSize.year:
|
||||
offset_timestamp = truncated.replace(year=truncated.year + offset)
|
||||
|
||||
return offset_timestamp
|
||||
|
||||
@staticmethod
|
||||
def truncate_timestamp(timestamp: datetime, batch_size: BatchSize) -> datetime:
|
||||
"""Truncates the passed in timestamp based on the batch_size.
|
||||
|
||||
2024-09-17 16:06:00 + Batchsize.hour -> 2024-09-17 16:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day -> 2024-09-17 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month -> 2024-09-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year -> 2024-01-01 00:00:00
|
||||
"""
|
||||
if batch_size == BatchSize.hour:
|
||||
truncated = datetime(
|
||||
timestamp.year,
|
||||
timestamp.month,
|
||||
timestamp.day,
|
||||
timestamp.hour,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
pytz.utc,
|
||||
)
|
||||
elif batch_size == BatchSize.day:
|
||||
truncated = datetime(
|
||||
timestamp.year, timestamp.month, timestamp.day, 0, 0, 0, 0, pytz.utc
|
||||
)
|
||||
elif batch_size == BatchSize.month:
|
||||
truncated = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0, 0, pytz.utc)
|
||||
elif batch_size == BatchSize.year:
|
||||
truncated = datetime(timestamp.year, 1, 1, 0, 0, 0, 0, pytz.utc)
|
||||
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def batch_id(start_time: datetime, batch_size: BatchSize) -> str:
|
||||
return MicrobatchBuilder.format_batch_start(start_time, batch_size).replace("-", "")
|
||||
|
||||
@staticmethod
|
||||
def format_batch_start(batch_start: datetime, batch_size: BatchSize) -> str:
|
||||
return str(
|
||||
batch_start.date() if (batch_start and batch_size != BatchSize.hour) else batch_start
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def ceiling_timestamp(timestamp: datetime, batch_size: BatchSize) -> datetime:
|
||||
"""Takes the given timestamp and moves it to the ceiling for the given batch size
|
||||
|
||||
Note, if the timestamp is already the batch size ceiling, that is returned
|
||||
2024-09-17 16:06:00 + BatchSize.hour -> 2024-09-17 17:00:00
|
||||
2024-09-17 16:00:00 + BatchSize.hour -> 2024-09-17 16:00:00
|
||||
2024-09-17 16:06:00 + BatchSize.day -> 2024-09-18 00:00:00
|
||||
2024-09-17 00:00:00 + BatchSize.day -> 2024-09-17 00:00:00
|
||||
2024-09-17 16:06:00 + BatchSize.month -> 2024-10-01 00:00:00
|
||||
2024-09-01 00:00:00 + BatchSize.month -> 2024-09-01 00:00:00
|
||||
2024-09-17 16:06:00 + BatchSize.year -> 2025-01-01 00:00:00
|
||||
2024-01-01 00:00:00 + BatchSize.year -> 2024-01-01 00:00:00
|
||||
|
||||
"""
|
||||
ceiling = truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size)
|
||||
if truncated != timestamp:
|
||||
ceiling = MicrobatchBuilder.offset_timestamp(truncated, batch_size, 1)
|
||||
return ceiling
|
||||
@@ -26,6 +26,11 @@ REFABLE_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Snapshot,
|
||||
]
|
||||
|
||||
TEST_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Test,
|
||||
NodeType.Unit,
|
||||
]
|
||||
|
||||
VERSIONED_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Model,
|
||||
]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user