mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-19 03:51:27 +00:00
Compare commits
103 Commits
fix_test_c
...
leahwicz-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fc49828618 | ||
|
|
611465b20c | ||
|
|
8f4777db5a | ||
|
|
06d87c0fe4 | ||
|
|
00c0780b56 | ||
|
|
abe8e83945 | ||
|
|
02cbae1f9f | ||
|
|
65908b395f | ||
|
|
4971395d5d | ||
|
|
eeec2038aa | ||
|
|
4fac086556 | ||
|
|
8818061d59 | ||
|
|
b195778eb9 | ||
|
|
de1763618a | ||
|
|
7485066ed4 | ||
|
|
15ce956380 | ||
|
|
e5c63884e2 | ||
|
|
9fef62d83e | ||
|
|
7563b997c2 | ||
|
|
291ff3600b | ||
|
|
2c405304ee | ||
|
|
1e5a7878e5 | ||
|
|
d89e1d7f85 | ||
|
|
98c015b775 | ||
|
|
a56502688f | ||
|
|
c0d757ab19 | ||
|
|
e68fd6eb7f | ||
|
|
90edc38859 | ||
|
|
0f018ea5dd | ||
|
|
1be6254363 | ||
|
|
760af71ed2 | ||
|
|
82f5e9f5b2 | ||
|
|
988c187db3 | ||
|
|
b23129982c | ||
|
|
4d5d0e2150 | ||
|
|
c0c487bf77 | ||
|
|
835d805079 | ||
|
|
c2a767184c | ||
|
|
1e7c8802eb | ||
|
|
a76ec42586 | ||
|
|
7418f36932 | ||
|
|
f9ef5e7e8e | ||
|
|
dbfa351395 | ||
|
|
e775f2b38e | ||
|
|
6f27454be4 | ||
|
|
201723d506 | ||
|
|
17555faaca | ||
|
|
36e0ab9f42 | ||
|
|
6017bd6cba | ||
|
|
30fed8d421 | ||
|
|
8ac5cdd2e1 | ||
|
|
114ac0793a | ||
|
|
d0b750461a | ||
|
|
9693170eb9 | ||
|
|
bbab6c2361 | ||
|
|
cfe3636c78 | ||
|
|
aadf3c702e | ||
|
|
1eac726a07 | ||
|
|
85e2c89794 | ||
|
|
fffcd3b404 | ||
|
|
fbfef4b1a3 | ||
|
|
526a6c0d0c | ||
|
|
1f33b6a74a | ||
|
|
95fc6d43e7 | ||
|
|
d8c261ffcf | ||
|
|
66ea0a9e0f | ||
|
|
435b542e7b | ||
|
|
10cd06f515 | ||
|
|
9da1868c3b | ||
|
|
2649fac4a4 | ||
|
|
6e05226e3b | ||
|
|
c1c3397f66 | ||
|
|
2065db2383 | ||
|
|
08fb868b63 | ||
|
|
8d39ef16b6 | ||
|
|
66c5082aa7 | ||
|
|
26fb58bd1b | ||
|
|
fed8826043 | ||
|
|
9af78a3249 | ||
|
|
bf1ad6cd17 | ||
|
|
15e995f2f5 | ||
|
|
b3e73b0de8 | ||
|
|
dd2633dfcb | ||
|
|
29f0278451 | ||
|
|
f0f98be692 | ||
|
|
5956a64b01 | ||
|
|
5fb36e3e2a | ||
|
|
9d295a1d91 | ||
|
|
39f350fe89 | ||
|
|
8c55e744b8 | ||
|
|
a260d4e25b | ||
|
|
509797588f | ||
|
|
2eed20f1f3 | ||
|
|
1d7b4c0db2 | ||
|
|
ac8cd788cb | ||
|
|
cacbd1c212 | ||
|
|
3f78bb7819 | ||
|
|
aa65b01fe3 | ||
|
|
4f0968d678 | ||
|
|
118973cf79 | ||
|
|
df7cc0521f | ||
|
|
40c02d2cc9 | ||
|
|
be70b1a0c1 |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.19.0
|
||||
current_version = 0.20.0rc1
|
||||
parse = (?P<major>\d+)
|
||||
\.(?P<minor>\d+)
|
||||
\.(?P<patch>\d+)
|
||||
|
||||
@@ -2,7 +2,7 @@ version: 2.1
|
||||
jobs:
|
||||
unit:
|
||||
docker: &test_only
|
||||
- image: fishtownanalytics/test-container:11
|
||||
- image: fishtownanalytics/test-container:12
|
||||
environment:
|
||||
DBT_INVOCATION_ENV: circle
|
||||
DOCKER_TEST_DATABASE_HOST: "database"
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
destination: dist
|
||||
integration-postgres:
|
||||
docker:
|
||||
- image: fishtownanalytics/test-container:11
|
||||
- image: fishtownanalytics/test-container:12
|
||||
environment:
|
||||
DBT_INVOCATION_ENV: circle
|
||||
DOCKER_TEST_DATABASE_HOST: "database"
|
||||
|
||||
244
.github/workflows/tests.yml
vendored
Normal file
244
.github/workflows/tests.yml
vendored
Normal file
@@ -0,0 +1,244 @@
|
||||
# This is a workflow to run our unit and integration tests for windows and mac
|
||||
|
||||
name: dbt Tests
|
||||
|
||||
# Triggers
|
||||
on:
|
||||
# Triggers the workflow on push or pull request events and also adds a manual trigger
|
||||
push:
|
||||
branches:
|
||||
- 'develop'
|
||||
- '*.latest'
|
||||
- 'releases/*'
|
||||
pull_request_target:
|
||||
branches:
|
||||
- 'develop'
|
||||
- '*.latest'
|
||||
- 'pr/*'
|
||||
- 'releases/*'
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
Linting:
|
||||
runs-on: ubuntu-latest #no need to run on every OS
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Linting'
|
||||
run: tox -e mypy,flake8 -- -v
|
||||
|
||||
UnitTest:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, ubuntu-latest, macos-latest]
|
||||
python-version: ['3.6', '3.7', '3.8']
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run unit tests'
|
||||
run: python -m tox -e py -- -v
|
||||
|
||||
PostgresIntegrationTest-Windows:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- python-version: '3.6'
|
||||
environment-version: 'py36-postgres'
|
||||
- python-version: '3.7'
|
||||
environment-version: 'py37-postgres'
|
||||
- python-version: '3.8'
|
||||
environment-version: 'py38-postgres'
|
||||
runs-on: 'windows-latest'
|
||||
environment: 'Postgres'
|
||||
needs: UnitTest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: 'Install postgresql and set up database'
|
||||
shell: pwsh
|
||||
run: |
|
||||
$serviceName = Get-Service -Name postgresql*
|
||||
Set-Service -InputObject $serviceName -StartupType Automatic
|
||||
Start-Service -InputObject $serviceName
|
||||
& $env:PGBIN\createdb.exe -U postgres dbt
|
||||
& $env:PGBIN\psql.exe -U postgres -c "CREATE ROLE root WITH PASSWORD '$env:ROOT_PASSWORD';"
|
||||
& $env:PGBIN\psql.exe -U postgres -c "ALTER ROLE root WITH LOGIN;"
|
||||
& $env:PGBIN\psql.exe -U postgres -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;"
|
||||
& $env:PGBIN\psql.exe -U postgres -c "CREATE ROLE noaccess WITH PASSWORD '$env:NOACCESS_PASSWORD' NOSUPERUSER;"
|
||||
& $env:PGBIN\psql.exe -U postgres -c "ALTER ROLE noaccess WITH LOGIN;"
|
||||
& $env:PGBIN\psql.exe -U postgres -c "GRANT CONNECT ON DATABASE dbt TO noaccess;"
|
||||
env:
|
||||
ROOT_PASSWORD: ${{ secrets.ROOT_PASSWORD }}
|
||||
NOACCESS_PASSWORD: ${{ secrets.NOACCESS_PASSWORD }}
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run integration tests'
|
||||
run: python -m tox -e ${{ matrix.environment-version }} -- -v -n4
|
||||
|
||||
PostgresIntegrationTest-Mac:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- python-version: '3.6'
|
||||
environment-version: 'py36-postgres'
|
||||
- python-version: '3.7'
|
||||
environment-version: 'py37-postgres'
|
||||
- python-version: '3.8'
|
||||
environment-version: 'py38-postgres'
|
||||
runs-on: 'macos-latest'
|
||||
environment: 'Postgres'
|
||||
needs: UnitTest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Start PostgreSQL on MacOS
|
||||
run: |
|
||||
brew services start postgresql
|
||||
echo "Check PostgreSQL service is running"
|
||||
i=10
|
||||
COMMAND='pg_isready'
|
||||
while [ $i -gt 0 ]; do
|
||||
echo "Check PostgreSQL service status"
|
||||
eval $COMMAND && break
|
||||
((i--))
|
||||
if [ $i == 0 ]; then
|
||||
echo "PostgreSQL service not ready, all attempts exhausted"
|
||||
exit 1
|
||||
fi
|
||||
echo "PostgreSQL service not ready, wait 10 more sec, attempts left: $i"
|
||||
sleep 10
|
||||
done
|
||||
- name: Create users and DBs
|
||||
run: |
|
||||
psql --command="CREATE USER root PASSWORD '$env:ROOT_PASSWORD'" --command="\du" postgres
|
||||
createdb --owner=root dbt
|
||||
PGPASSWORD=$env:ROOT_PASSWORD psql --username=root --host=localhost --list dbt
|
||||
psql --command="CREATE USER noaccess PASSWORD '$env:NOACCESS_PASSWORD'" --command="\du" postgres
|
||||
psql --command="grant all privileges on database dbt to noaccess" --command="\du" postgres
|
||||
createdb --owner=root dbtMixedCase
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run integration tests'
|
||||
run: tox -p -e ${{ matrix.environment-version }} -- -v -n4
|
||||
|
||||
# These three are all similar except secure environment variables, which MUST be passed along to their tasks,
|
||||
# but there's probably a better way to do this!
|
||||
SnowflakeIntegrationTest:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, macos-latest]
|
||||
python-version: ['3.6', '3.7', '3.8']
|
||||
runs-on: ${{ matrix.os }}
|
||||
environment: 'Snowflake'
|
||||
needs: UnitTest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run integration tests'
|
||||
run: python -m tox -e py-snowflake -- -v -n4
|
||||
env:
|
||||
SNOWFLAKE_TEST_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT }}
|
||||
SNOWFLAKE_TEST_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD }}
|
||||
SNOWFLAKE_TEST_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
|
||||
SNOWFLAKE_TEST_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_WAREHOUSE }}
|
||||
SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN: ${{ secrets.SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN }}
|
||||
SNOWFLAKE_TEST_OAUTH_CLIENT_ID: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_ID }}
|
||||
SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET }}
|
||||
SNOWFLAKE_TEST_ALT_DATABASE: ${{ secrets.SNOWFLAKE_TEST_ALT_DATABASE }}
|
||||
SNOWFLAKE_TEST_ALT_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_ALT_WAREHOUSE }}
|
||||
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }}
|
||||
SNOWFLAKE_TEST_QUOTED_DATABASE: ${{ secrets.SNOWFLAKE_TEST_QUOTED_DATABASE }}
|
||||
SNOWFLAKE_TEST_ROLE: ${{ secrets.SNOWFLAKE_TEST_ROLE }}
|
||||
|
||||
BigQueryIntegrationTest:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, macos-latest]
|
||||
python-version: ['3.6', '3.7', '3.8']
|
||||
runs-on: ${{ matrix.os }}
|
||||
environment: 'Bigquery'
|
||||
needs: UnitTest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run integration tests'
|
||||
run: python -m tox -e py-bigquery -- -v -n4
|
||||
env:
|
||||
BIGQUERY_SERVICE_ACCOUNT_JSON: ${{ secrets.BIGQUERY_SERVICE_ACCOUNT_JSON }}
|
||||
BIGQUERY_TEST_ALT_DATABASE: ${{ secrets.BIGQUERY_TEST_ALT_DATABASE }}
|
||||
|
||||
RedshiftIntegrationTest:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-latest, macos-latest]
|
||||
python-version: ['3.6', '3.7', '3.8']
|
||||
runs-on: ${{ matrix.os }}
|
||||
environment: 'Redshift'
|
||||
needs: UnitTest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2.2.2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
|
||||
- name: 'Install dependencies'
|
||||
run: python -m pip install --upgrade pip && pip install tox
|
||||
|
||||
- name: 'Run integration tests'
|
||||
run: python -m tox -e py-redshift -- -v -n4
|
||||
env:
|
||||
REDSHIFT_TEST_DBNAME: ${{ secrets.REDSHIFT_TEST_DBNAME }}
|
||||
REDSHIFT_TEST_PASS: ${{ secrets.REDSHIFT_TEST_PASS }}
|
||||
REDSHIFT_TEST_USER: ${{ secrets.REDSHIFT_TEST_USER }}
|
||||
REDSHIFT_TEST_PORT: ${{ secrets.REDSHIFT_TEST_PORT }}
|
||||
REDSHIFT_TEST_HOST: ${{ secrets.REDSHIFT_TEST_HOST }}
|
||||
118
CHANGELOG.md
118
CHANGELOG.md
@@ -1,19 +1,76 @@
|
||||
## dbt 0.20.0 (Release TBD)
|
||||
|
||||
## dbt 0.20.0rc1 (June 04, 2021)
|
||||
|
||||
|
||||
### Breaking changes
|
||||
- Fix adapter.dispatch macro resolution when statically extracting macros. Introduce new project-level `dispatch` config. The `packages` argument to `dispatch` no longer supports macro calls; there is backwards compatibility for existing packages. The argument will no longer be supported in a future release, instead provide the `macro_namespace` argument. ([#3362](https://github.com/fishtown-analytics/dbt/issues/3362), [#3363](https://github.com/fishtown-analytics/dbt/pull/3363), [#3383](https://github.com/fishtown-analytics/dbt/pull/3383), [#3403](https://github.com/fishtown-analytics/dbt/pull/3403))
|
||||
|
||||
### Features
|
||||
- Support optional `updated_at` config parameter with `check` strategy snapshots. If not supplied, will use current timestamp (default). ([#1844](https://github.com/fishtown-analytics/dbt/issues/1844), [#3376](https://github.com/fishtown-analytics/dbt/pull/3376))
|
||||
- Add the opt-in `--use-experimental-parser` flag ([#3307](https://github.com/fishtown-analytics/dbt/issues/3307), [#3374](https://github.com/fishtown-analytics/dbt/issues/3374))
|
||||
- Store test failures in the database ([#517](https://github.com/fishtown-analytics/dbt/issues/517), [#903](https://github.com/fishtown-analytics/dbt/issues/903), [#2593](https://github.com/fishtown-analytics/dbt/issues/2593), [#3316](https://github.com/fishtown-analytics/dbt/issues/3316))
|
||||
- Add new test configs: `where`, `limit`, `warn_if`, `error_if`, `fail_calc` ([#3258](https://github.com/fishtown-analytics/dbt/issues/3258), [#3321](https://github.com/fishtown-analytics/dbt/issues/3321), [#3336](https://github.com/fishtown-analytics/dbt/pull/3336))
|
||||
- Move partial parsing to end of parsing and implement new partial parsing method. ([#3217](https://github.com/fishtown-analytics/dbt/issues/3217), [#3364](https://github.com/fishtown-analytics/dbt/pull/3364))
|
||||
- Save doc file node references and use in partial parsing. ([#3425](https://github.com/fishtown-analytics/dbt/issues/3425), [#3432](https://github.com/fishtown-analytics/dbt/pull/3432))
|
||||
|
||||
### Fixes
|
||||
- Fix exit code from dbt debug not returning a failure when one of the tests fail ([#3017](https://github.com/fishtown-analytics/dbt/issues/3017))
|
||||
- Auto-generated CTEs in tests and ephemeral models have lowercase names to comply with dbt coding conventions ([#3027](https://github.com/fishtown-analytics/dbt/issues/3027), [#3028](https://github.com/fishtown-analytics/dbt/issues/3028))
|
||||
- Fix incorrect error message when a selector does not match any node [#3036](https://github.com/fishtown-analytics/dbt/issues/3036))
|
||||
- Fix variable `_dbt_max_partition` declaration and initialization for BigQuery incremental models ([#2940](https://github.com/fishtown-analytics/dbt/issues/2940), [#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
|
||||
- Moving from 'master' to 'HEAD' default branch in git ([#3057](https://github.com/fishtown-analytics/dbt/issues/3057), [#3104](https://github.com/fishtown-analytics/dbt/issues/3104), [#3117](https://github.com/fishtown-analytics/dbt/issues/3117)))
|
||||
- Requirement on `dataclasses` is relaxed to be between `>=0.6,<0.9` allowing dbt to cohabit with other libraries which required higher versions. ([#3150](https://github.com/fishtown-analytics/dbt/issues/3150), [#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
|
||||
- Add feature to add `_n` alias to same column names in SQL query ([#3147](https://github.com/fishtown-analytics/dbt/issues/3147), [#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
|
||||
- Raise a proper error message if dbt parses a macro twice due to macro duplication or misconfiguration. ([#2449](https://github.com/fishtown-analytics/dbt/issues/2449), [#3165](https://github.com/fishtown-analytics/dbt/pull/3165))
|
||||
- Fix exposures missing in graph context variable. ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
|
||||
- Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
|
||||
- Fix compiled sql for ephemeral models ([#3317](https://github.com/fishtown-analytics/dbt/issues/3317), [#3318](https://github.com/fishtown-analytics/dbt/pull/3318))
|
||||
- Now generating `run_results.json` even when no nodes are selected ([#3313](https://github.com/fishtown-analytics/dbt/issues/3313), [#3315](https://github.com/fishtown-analytics/dbt/pull/3315))
|
||||
- Add missing `packaging` dependency ([#3312](https://github.com/fishtown-analytics/dbt/issues/3312), [#3339](https://github.com/fishtown-analytics/dbt/pull/3339))
|
||||
- Fix references to macros with package names when rendering schema tests ([#3324](https://github.com/fishtown-analytics/dbt/issues/3324), [#3345](https://github.com/fishtown-analytics/dbt/pull/3345))
|
||||
- Stop clobbering default keyword arguments for jinja test definitions ([#3329](https://github.com/fishtown-analytics/dbt/issues/3329), [#3340](https://github.com/fishtown-analytics/dbt/pull/3340))
|
||||
- Fix unique_id generation for generic tests so tests with the same FQN but different configuration will run. ([#3254](https://github.com/fishtown-analytics/dbt/issues/3254), [#3335](https://github.com/fishtown-analytics/dbt/issues/3335))
|
||||
- Update the snowflake adapter to only comment on a column if it exists when using the persist_docs config ([#3039](https://github.com/fishtown-analytics/dbt/issues/3039), [#3149](https://github.com/fishtown-analytics/dbt/pull/3149))
|
||||
- Add a better error messages for undefined macros and when there are less packages installed than specified in `packages.yml`. ([#2999](https://github.com/fishtown-analytics/dbt/issues/2999))
|
||||
- Separate `compiled_path` from `build_path`, and print the former alongside node error messages ([#1985](https://github.com/fishtown-analytics/dbt/issues/1985), [#3327](https://github.com/fishtown-analytics/dbt/pull/3327))
|
||||
- Fix exception caused when running `dbt debug` with BigQuery connections ([#3314](https://github.com/fishtown-analytics/dbt/issues/3314), [#3351](https://github.com/fishtown-analytics/dbt/pull/3351))
|
||||
- Raise better error if snapshot is missing required configurations ([#3381](https://github.com/fishtown-analytics/dbt/issues/3381), [#3385](https://github.com/fishtown-analytics/dbt/pull/3385))
|
||||
- Fix `dbt run` errors caused from receiving non-JSON responses from Snowflake with Oauth ([#3350](https://github.com/fishtown-analytics/dbt/issues/3350)
|
||||
|
||||
### Docs
|
||||
- Reversed the rendering direction of relationship tests so that the test renders in the model it is defined in ([docs#181](https://github.com/fishtown-analytics/dbt-docs/issues/181), [docs#183](https://github.com/fishtown-analytics/dbt-docs/pull/183))
|
||||
- Support dots in model names: display them in the graphs ([docs#184](https://github.com/fishtown-analytics/dbt-docs/issues/184), [docs#185](https://github.com/fishtown-analytics/dbt-docs/issues/185))
|
||||
- Render meta tags for sources ([docs#192](https://github.com/fishtown-analytics/dbt-docs/issues/192), [docs#193](https://github.com/fishtown-analytics/dbt-docs/issues/193))
|
||||
|
||||
### Under the hood
|
||||
- Added logic for registry requests to raise a timeout error after a response hangs out for 30 seconds and 5 attempts have been made to reach the endpoint ([#3177](https://github.com/fishtown-analytics/dbt/issues/3177), [#3275](https://github.com/fishtown-analytics/dbt/pull/3275))
|
||||
- Added support for invoking the `list` task via the RPC server ([#3311](https://github.com/fishtown-analytics/dbt/issues/3311), [#3384](https://github.com/fishtown-analytics/dbt/pull/3384))
|
||||
- Added `unique_id` and `original_file_path` as keys to json responses from the `list` task ([#3356](https://github.com/fishtown-analytics/dbt/issues/3356), [#3384](https://github.com/fishtown-analytics/dbt/pull/3384))
|
||||
- Use shutil.which so Windows can pick up git.bat as a git executable ([#3035](https://github.com/fishtown-analytics/dbt/issues/3035), [#3134](https://github.com/fishtown-analytics/dbt/issues/3134))
|
||||
- Add `ssh-client` and update `git` version (using buster backports) in Docker image ([#3337](https://github.com/fishtown-analytics/dbt/issues/3337), [#3338](https://github.com/fishtown-analytics/dbt/pull/3338))
|
||||
- Add `tags` and `meta` properties to the exposure resource schema. ([#3404](https://github.com/fishtown-analytics/dbt/issues/3404), [#3405](https://github.com/fishtown-analytics/dbt/pull/3405))
|
||||
- Update test sub-query alias ([#3398](https://github.com/fishtown-analytics/dbt/issues/3398), [#3414](https://github.com/fishtown-analytics/dbt/pull/3414))
|
||||
- Bump schema versions for run results and manifest artifacts ([#3422](https://github.com/fishtown-analytics/dbt/issues/3422), [#3421](https://github.com/fishtown-analytics/dbt/pull/3421))
|
||||
- Add deprecation warning for using `packages` argument with `adapter.dispatch` ([#3419](https://github.com/fishtown-analytics/dbt/issues/3419), [#3420](https://github.com/fishtown-analytics/dbt/pull/3420))
|
||||
|
||||
Contributors:
|
||||
- [@TeddyCr](https://github.com/TeddyCr) ([#3275](https://github.com/fishtown-analytics/dbt/pull/3275))
|
||||
- [@panasenco](https://github.com/panasenco) ([#3315](https://github.com/fishtown-analytics/dbt/pull/3315))
|
||||
- [@dmateusp](https://github.com/dmateusp) ([#3338](https://github.com/fishtown-analytics/dbt/pull/3338))
|
||||
- [@peiwangdb](https://github.com/peiwangdb) ([#3344](https://github.com/fishtown-analytics/dbt/pull/3344))
|
||||
- [@elikastelein](https://github.com/elikastelein) ([#3149](https://github.com/fishtown-analytics/dbt/pull/3149))
|
||||
- [@majidaldo](https://github.com/majidaldo) ([#3134](https://github.com/fishtown-analytics/dbt/issues/3134))
|
||||
- [@jaypeedevlin](https://github.com/jaypeedevlin) ([#2999](https://github.com/fishtown-analytics/dbt/issues/2999))
|
||||
- [@PJGaetan](https://github.com/PJGaetan) ([#3315](https://github.com/fishtown-analytics/dbt/pull/3376))
|
||||
- [@jnatkins](https://github.com/jnatkins) ([#3385](https://github.com/fishtown-analytics/dbt/pull/3385))
|
||||
- [@matt-winkler](https://github.com/matt-winkler) ([#3365](https://github.com/fishtown-analytics/dbt/pull/3365))
|
||||
- [@stkbailey](https://github.com/stkbailey) ([#3404](https://github.com/fishtown-analytics/dbt/pull/3405))
|
||||
- [@mascah](https://github.com/mascah) ([docs#181](https://github.com/fishtown-analytics/dbt-docs/issues/181), [docs#183](https://github.com/fishtown-analytics/dbt-docs/pull/183))
|
||||
- [@monti-python](https://github.com/monti-python) ([docs#184](https://github.com/fishtown-analytics/dbt-docs/issues/184))
|
||||
- [@diegodewilde](https://github.com/diegodewilde) ([docs#193](https://github.com/fishtown-analytics/dbt-docs/issues/193))
|
||||
|
||||
## dbt 0.20.0b1 (May 03, 2021)
|
||||
|
||||
### Breaking changes
|
||||
|
||||
- Add Jinja tag for generic test definitions. Replacement for macros prefixed `test_` ([#1173](https://github.com/fishtown-analytics/dbt/issues/1173), [#3261](https://github.com/fishtown-analytics/dbt/pull/3261))
|
||||
- Update schema/generic tests to expect a set of rows instead of a single numeric value, and to use test materialization when executing. ([#3192](https://github.com/fishtown-analytics/dbt/issues/3192), [#3286](https://github.com/fishtown-analytics/dbt/pull/3286))
|
||||
- **Plugin maintainers:** For adapters that inherit from other adapters (e.g. `dbt-postgres` → `dbt-redshift`), `adapter.dispatch()` will now include parent macro implementations as viable candidates ([#2923](https://github.com/fishtown-analytics/dbt/issues/2923), [#3296](https://github.com/fishtown-analytics/dbt/pull/3296))
|
||||
|
||||
### Features
|
||||
- Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
|
||||
- Add optional `subdirectory` key to install dbt packages that are not hosted at the root of a Git repository ([#275](https://github.com/fishtown-analytics/dbt/issues/275), [#3267](https://github.com/fishtown-analytics/dbt/pull/3267))
|
||||
- Add optional configs for `require_partition_filter` and `partition_expiration_days` in BigQuery ([#1843](https://github.com/fishtown-analytics/dbt/issues/1843), [#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
|
||||
- Fix for EOL SQL comments prevent entire line execution ([#2731](https://github.com/fishtown-analytics/dbt/issues/2731), [#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
|
||||
- Add optional `merge_update_columns` config to specify columns to update for `merge` statements in BigQuery and Snowflake ([#1862](https://github.com/fishtown-analytics/dbt/issues/1862), [#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
|
||||
@@ -21,12 +78,28 @@
|
||||
- Set application_name for Postgres connections ([#885](https://github.com/fishtown-analytics/dbt/issues/885), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
|
||||
- Support disabling schema tests, and configuring tests from `dbt_project.yml` ([#3252](https://github.com/fishtown-analytics/dbt/issues/3252),
|
||||
[#3253](https://github.com/fishtown-analytics/dbt/issues/3253), [#3257](https://github.com/fishtown-analytics/dbt/pull/3257))
|
||||
- Add Jinja tag for tests ([#1173](https://github.com/fishtown-analytics/dbt/issues/1173), [#3261](https://github.com/fishtown-analytics/dbt/pull/3261))
|
||||
- Add native support for Postgres index creation ([#804](https://github.com/fishtown-analytics/dbt/issues/804), [3106](https://github.com/fishtown-analytics/dbt/pull/3106))
|
||||
- Less greedy test selection: expand to select unselected tests if and only if all parents are selected ([#2891](https://github.com/fishtown-analytics/dbt/issues/2891), [#3235](https://github.com/fishtown-analytics/dbt/pull/3235))
|
||||
- Prevent locks in Redshift during full refresh in incremental materialization. ([#2426](https://github.com/fishtown-analytics/dbt/issues/2426), [#2998](https://github.com/fishtown-analytics/dbt/pull/2998))
|
||||
|
||||
### Fixes
|
||||
- Fix exit code from dbt debug not returning a failure when one of the tests fail ([#3017](https://github.com/fishtown-analytics/dbt/issues/3017), [#3018](https://github.com/fishtown-analytics/dbt/issues/3018))
|
||||
- Auto-generated CTEs in tests and ephemeral models have lowercase names to comply with dbt coding conventions ([#3027](https://github.com/fishtown-analytics/dbt/issues/3027), [#3028](https://github.com/fishtown-analytics/dbt/issues/3028))
|
||||
- Fix incorrect error message when a selector does not match any node [#3036](https://github.com/fishtown-analytics/dbt/issues/3036))
|
||||
- Fix variable `_dbt_max_partition` declaration and initialization for BigQuery incremental models ([#2940](https://github.com/fishtown-analytics/dbt/issues/2940), [#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
|
||||
- Moving from 'master' to 'HEAD' default branch in git ([#3057](https://github.com/fishtown-analytics/dbt/issues/3057), [#3104](https://github.com/fishtown-analytics/dbt/issues/3104), [#3117](https://github.com/fishtown-analytics/dbt/issues/3117)))
|
||||
- Requirement on `dataclasses` is relaxed to be between `>=0.6,<0.9` allowing dbt to cohabit with other libraries which required higher versions. ([#3150](https://github.com/fishtown-analytics/dbt/issues/3150), [#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
|
||||
- Add feature to add `_n` alias to same column names in SQL query ([#3147](https://github.com/fishtown-analytics/dbt/issues/3147), [#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
|
||||
- Raise a proper error message if dbt parses a macro twice due to macro duplication or misconfiguration. ([#2449](https://github.com/fishtown-analytics/dbt/issues/2449), [#3165](https://github.com/fishtown-analytics/dbt/pull/3165))
|
||||
- Fix exposures missing in graph context variable. ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241), [#3243](https://github.com/fishtown-analytics/dbt/issues/3243))
|
||||
- Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
|
||||
- Use absolute path for profiles directory instead of a path relative to the project directory. Note: If a user supplies a relative path to the profiles directory, the value of `args.profiles_dir` will still be absolute. ([#3133](https://github.com/fishtown-analytics/dbt/issues/3133), [#3176](https://github.com/fishtown-analytics/dbt/issues/3176))
|
||||
- Fix FQN selector unable to find models whose name contains dots ([#3246](https://github.com/fishtown-analytics/dbt/issues/3246), [#3247](https://github.com/fishtown-analytics/dbt/issues/3247))
|
||||
|
||||
### Under the hood
|
||||
- Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062))
|
||||
- Update script to collect and write json schema for dbt artifacts ([#2870](https://github.com/fishtown-analytics/dbt/issues/2870), [#3065](https://github.com/fishtown-analytics/dbt/pull/3065))
|
||||
- Relax Google Cloud dependency pins to major versions. ([#3156](https://github.com/fishtown-analytics/dbt/pull/3156)
|
||||
- Relax Google Cloud dependency pins to major versions. ([#3155](https://github.com/fishtown-analytics/dbt/pull/3156), [#3155](https://github.com/fishtown-analytics/dbt/pull/3156))
|
||||
- Bump `snowflake-connector-python` and releated dependencies, support Python 3.9 ([#2985](https://github.com/fishtown-analytics/dbt/issues/2985), [#3148](https://github.com/fishtown-analytics/dbt/pull/3148))
|
||||
- General development environment clean up and improve experience running tests locally ([#3194](https://github.com/fishtown-analytics/dbt/issues/3194), [#3204](https://github.com/fishtown-analytics/dbt/pull/3204), [#3228](https://github.com/fishtown-analytics/dbt/pull/3228))
|
||||
- Add a new materialization for tests, update data tests to use test materialization when executing. ([#3154](https://github.com/fishtown-analytics/dbt/issues/3154), [#3181](https://github.com/fishtown-analytics/dbt/pull/3181))
|
||||
@@ -35,10 +108,10 @@
|
||||
|
||||
Contributors:
|
||||
- [@yu-iskw](https://github.com/yu-iskw) ([#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
|
||||
- [@sdebruyn](https://github.com/sdebruyn) / [@lynxcare](https://github.com/lynxcare) ([#3018](https://github.com/fishtown-analytics/dbt/pull/3018))
|
||||
- [@sdebruyn](https://github.com/sdebruyn) ([#3018](https://github.com/fishtown-analytics/dbt/pull/3018))
|
||||
- [@rvacaru](https://github.com/rvacaru) ([#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
|
||||
- [@NiallRees](https://github.com/NiallRees) ([#3028](https://github.com/fishtown-analytics/dbt/pull/3028))
|
||||
- [ran-eh](https://github.com/ran-eh) ([#3036](https://github.com/fishtown-analytics/dbt/pull/3036))
|
||||
- [@ran-eh](https://github.com/ran-eh) ([#3036](https://github.com/fishtown-analytics/dbt/pull/3036))
|
||||
- [@pcasteran](https://github.com/pcasteran) ([#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
|
||||
- [@VasiliiSurov](https://github.com/VasiliiSurov) ([#3104](https://github.com/fishtown-analytics/dbt/pull/3104))
|
||||
- [@jmcarp](https://github.com/jmcarp) ([#3145](https://github.com/fishtown-analytics/dbt/pull/3145))
|
||||
@@ -47,14 +120,25 @@ Contributors:
|
||||
- [@prratek](https://github.com/prratek) ([#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
|
||||
- [@techytushar](https://github.com/techytushar) ([#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
|
||||
- [@cgopalan](https://github.com/cgopalan) ([#3165](https://github.com/fishtown-analytics/dbt/pull/3165), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
|
||||
- [@fux](https://github.com/fuchsst) ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
|
||||
- [@dmateusp](https://github.com/dmateusp) ([#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
|
||||
- [@fux](https://github.com/fuchsst) ([#3243](https://github.com/fishtown-analytics/dbt/issues/3243))
|
||||
- [@arzavj](https://github.com/arzavj) ([3106](https://github.com/fishtown-analytics/dbt/pull/3106))
|
||||
- [@JCZuurmond](https://github.com/JCZuurmond) ([#3176](https://github.com/fishtown-analytics/dbt/pull/3176))
|
||||
- [@dmateusp](https://github.com/dmateusp) ([#3270](https://github.com/fishtown-analytics/dbt/pull/3270), [#3267](https://github.com/fishtown-analytics/dbt/pull/3267))
|
||||
- [@monti-python](https://github.com/monti-python) ([#3247](https://github.com/fishtown-analytics/dbt/issues/3247))
|
||||
- [@drkarthi](https://github.com/drkarthi) ([#2426](https://github.com/fishtown-analytics/dbt/issues/2426), [#2998](https://github.com/fishtown-analytics/dbt/pull/2998))
|
||||
|
||||
## dbt 0.19.2 (Release TBD)
|
||||
|
||||
### Breaking changes
|
||||
- Fix adapter.dispatch macro resolution when statically extracting macros. Introduce new project-level `dispatch` config. The `packages` argument to `dispatch` no longer supports macro calls; there is backwards compatibility for existing packages. The argument will no longer be supported in a future release, instead provide the `macro_namespace` argument. ([#3362](https://github.com/fishtown-analytics/dbt/issues/3362), [#3363](https://github.com/fishtown-analytics/dbt/pull/3363), [#3383](https://github.com/fishtown-analytics/dbt/pull/3383), [#3403](https://github.com/fishtown-analytics/dbt/pull/3403))
|
||||
|
||||
### Fixes
|
||||
- Fix references to macros with package names when rendering schema tests ([#3324](https://github.com/fishtown-analytics/dbt/issues/3324), [#3345](https://github.com/fishtown-analytics/dbt/pull/3345))
|
||||
|
||||
## dbt 0.19.1 (March 31, 2021)
|
||||
|
||||
## dbt 0.19.1rc2 (March 25, 2021)
|
||||
|
||||
|
||||
### Fixes
|
||||
- Pass service-account scopes to gcloud-based oauth ([#3040](https://github.com/fishtown-analytics/dbt/issues/3040), [#3041](https://github.com/fishtown-analytics/dbt/pull/3041))
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@ FROM ubuntu:18.04
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
software-properties-common \
|
||||
&& add-apt-repository ppa:git-core/ppa -y \
|
||||
&& apt-get dist-upgrade -y \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
netcat \
|
||||
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
- job: SnowflakeIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
dependsOn: UnitTest
|
||||
condition: succeeded()
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
- job: BigQueryIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
dependsOn: UnitTest
|
||||
condition: succeeded()
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
- job: RedshiftIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
dependsOn: UnitTest
|
||||
condition: succeeded()
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
|
||||
@@ -4,6 +4,7 @@ import os.path
|
||||
from dbt.clients.system import run_cmd, rmdir
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
import dbt.exceptions
|
||||
from packaging import version
|
||||
|
||||
|
||||
def _is_commit(revision: str) -> bool:
|
||||
@@ -11,11 +12,22 @@ def _is_commit(revision: str) -> bool:
|
||||
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
|
||||
|
||||
|
||||
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):
|
||||
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirectory=None):
|
||||
has_revision = revision is not None
|
||||
is_commit = _is_commit(revision or "")
|
||||
|
||||
clone_cmd = ['git', 'clone', '--depth', '1']
|
||||
if subdirectory:
|
||||
logger.debug(' Subdirectory specified: {}, using sparse checkout.'.format(subdirectory))
|
||||
out, _ = run_cmd(cwd, ['git', '--version'], env={'LC_ALL': 'C'})
|
||||
git_version = version.parse(re.search(r"\d+\.\d+\.\d+", out.decode("utf-8")).group(0))
|
||||
if not git_version >= version.parse("2.25.0"):
|
||||
# 2.25.0 introduces --sparse
|
||||
raise RuntimeError(
|
||||
"Please update your git version to pull a dbt package "
|
||||
"from a subdirectory: your version is {}, >= 2.25.0 needed".format(git_version)
|
||||
)
|
||||
clone_cmd.extend(['--filter=blob:none', '--sparse'])
|
||||
|
||||
if has_revision and not is_commit:
|
||||
clone_cmd.extend(['--branch', revision])
|
||||
@@ -24,9 +36,11 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):
|
||||
|
||||
if dirname is not None:
|
||||
clone_cmd.append(dirname)
|
||||
|
||||
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
|
||||
|
||||
if subdirectory:
|
||||
run_cmd(os.path.join(cwd, dirname or ''), ['git', 'sparse-checkout', 'set', subdirectory])
|
||||
|
||||
if remove_git_dir:
|
||||
rmdir(os.path.join(dirname, '.git'))
|
||||
|
||||
@@ -84,11 +98,16 @@ def remove_remote(cwd):
|
||||
|
||||
|
||||
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
|
||||
revision=None):
|
||||
revision=None, subdirectory=None):
|
||||
exists = None
|
||||
try:
|
||||
_, err = clone(repo, cwd, dirname=dirname,
|
||||
remove_git_dir=remove_git_dir)
|
||||
_, err = clone(
|
||||
repo,
|
||||
cwd,
|
||||
dirname=dirname,
|
||||
remove_git_dir=remove_git_dir,
|
||||
subdirectory=subdirectory,
|
||||
)
|
||||
except dbt.exceptions.CommandResultError as exc:
|
||||
err = exc.stderr.decode('utf-8')
|
||||
exists = re.match("fatal: destination path '(.+)' already exists", err)
|
||||
@@ -120,4 +139,4 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
|
||||
start_sha[:7], end_sha[:7])
|
||||
else:
|
||||
logger.debug(' Checked out at {}.', end_sha[:7])
|
||||
return directory
|
||||
return os.path.join(directory, subdirectory or '')
|
||||
|
||||
@@ -29,7 +29,8 @@ from dbt.contracts.graph.compiled import CompiledSchemaTestNode
|
||||
from dbt.contracts.graph.parsed import ParsedSchemaTestNode
|
||||
from dbt.exceptions import (
|
||||
InternalException, raise_compiler_error, CompilationException,
|
||||
invalid_materialization_argument, MacroReturn, JinjaRenderingException
|
||||
invalid_materialization_argument, MacroReturn, JinjaRenderingException,
|
||||
UndefinedMacroException
|
||||
)
|
||||
from dbt import flags
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
@@ -416,7 +417,6 @@ class TestExtension(jinja2.ext.Extension):
|
||||
test_name = parser.parse_assign_target(name_only=True).name
|
||||
|
||||
parser.parse_signature(node)
|
||||
node.defaults = []
|
||||
node.name = get_test_macro_name(test_name)
|
||||
node.body = parser.parse_statements(('name:endtest',),
|
||||
drop_needle=True)
|
||||
@@ -519,7 +519,7 @@ def catch_jinja(node=None) -> Iterator[None]:
|
||||
e.translated = False
|
||||
raise CompilationException(str(e), node) from e
|
||||
except jinja2.exceptions.UndefinedError as e:
|
||||
raise CompilationException(str(e), node) from e
|
||||
raise UndefinedMacroException(str(e), node) from e
|
||||
except CompilationException as exc:
|
||||
exc.add_node(node)
|
||||
raise
|
||||
@@ -663,39 +663,3 @@ def add_rendered_test_kwargs(
|
||||
|
||||
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
|
||||
context[SCHEMA_TEST_KWARGS_NAME] = kwargs
|
||||
|
||||
|
||||
def statically_extract_macro_calls(string, ctx):
|
||||
# set 'capture_macros' to capture undefined
|
||||
env = get_environment(None, capture_macros=True)
|
||||
parsed = env.parse(string)
|
||||
|
||||
standard_calls = {
|
||||
'source': [],
|
||||
'ref': [],
|
||||
'config': [],
|
||||
}
|
||||
|
||||
possible_macro_calls = []
|
||||
for func_call in parsed.find_all(jinja2.nodes.Call):
|
||||
if hasattr(func_call, 'node') and hasattr(func_call.node, 'name'):
|
||||
func_name = func_call.node.name
|
||||
else:
|
||||
# This is a kludge to capture an adapter.dispatch('<macro_name>') call.
|
||||
# Call(node=Getattr(
|
||||
# node=Name(name='adapter', ctx='load'), attr='dispatch', ctx='load'),
|
||||
# args=[Const(value='get_snapshot_unique_id')], kwargs=[],
|
||||
# dyn_args=None, dyn_kwargs=None)
|
||||
if (hasattr(func_call, 'node') and hasattr(func_call.node, 'attr') and
|
||||
func_call.node.attr == 'dispatch'):
|
||||
func_name = func_call.args[0].value
|
||||
else:
|
||||
continue
|
||||
if func_name in standard_calls:
|
||||
continue
|
||||
elif ctx.get(func_name):
|
||||
continue
|
||||
else:
|
||||
possible_macro_calls.append(func_name)
|
||||
|
||||
return possible_macro_calls
|
||||
|
||||
225
core/dbt/clients/jinja_static.py
Normal file
225
core/dbt/clients/jinja_static.py
Normal file
@@ -0,0 +1,225 @@
|
||||
import jinja2
|
||||
from dbt.clients.jinja import get_environment
|
||||
from dbt.exceptions import raise_compiler_error
|
||||
|
||||
|
||||
def statically_extract_macro_calls(string, ctx, db_wrapper=None):
|
||||
# set 'capture_macros' to capture undefined
|
||||
env = get_environment(None, capture_macros=True)
|
||||
parsed = env.parse(string)
|
||||
|
||||
standard_calls = ['source', 'ref', 'config']
|
||||
possible_macro_calls = []
|
||||
for func_call in parsed.find_all(jinja2.nodes.Call):
|
||||
func_name = None
|
||||
if hasattr(func_call, 'node') and hasattr(func_call.node, 'name'):
|
||||
func_name = func_call.node.name
|
||||
else:
|
||||
# func_call for dbt_utils.current_timestamp macro
|
||||
# Call(
|
||||
# node=Getattr(
|
||||
# node=Name(
|
||||
# name='dbt_utils',
|
||||
# ctx='load'
|
||||
# ),
|
||||
# attr='current_timestamp',
|
||||
# ctx='load
|
||||
# ),
|
||||
# args=[],
|
||||
# kwargs=[],
|
||||
# dyn_args=None,
|
||||
# dyn_kwargs=None
|
||||
# )
|
||||
if (hasattr(func_call, 'node') and
|
||||
hasattr(func_call.node, 'node') and
|
||||
type(func_call.node.node).__name__ == 'Name' and
|
||||
hasattr(func_call.node, 'attr')):
|
||||
package_name = func_call.node.node.name
|
||||
macro_name = func_call.node.attr
|
||||
if package_name == 'adapter':
|
||||
if macro_name == 'dispatch':
|
||||
ad_macro_calls = statically_parse_adapter_dispatch(
|
||||
func_call, ctx, db_wrapper)
|
||||
possible_macro_calls.extend(ad_macro_calls)
|
||||
else:
|
||||
# This skips calls such as adapter.parse_index
|
||||
continue
|
||||
else:
|
||||
func_name = f'{package_name}.{macro_name}'
|
||||
else:
|
||||
continue
|
||||
if not func_name:
|
||||
continue
|
||||
if func_name in standard_calls:
|
||||
continue
|
||||
elif ctx.get(func_name):
|
||||
continue
|
||||
else:
|
||||
if func_name not in possible_macro_calls:
|
||||
possible_macro_calls.append(func_name)
|
||||
|
||||
return possible_macro_calls
|
||||
|
||||
|
||||
# Call(
|
||||
# node=Getattr(
|
||||
# node=Name(
|
||||
# name='adapter',
|
||||
# ctx='load'
|
||||
# ),
|
||||
# attr='dispatch',
|
||||
# ctx='load'
|
||||
# ),
|
||||
# args=[
|
||||
# Const(value='test_pkg_and_dispatch')
|
||||
# ],
|
||||
# kwargs=[
|
||||
# Keyword(
|
||||
# key='packages',
|
||||
# value=Call(node=Getattr(node=Name(name='local_utils', ctx='load'),
|
||||
# attr='_get_utils_namespaces', ctx='load'), args=[], kwargs=[],
|
||||
# dyn_args=None, dyn_kwargs=None)
|
||||
# )
|
||||
# ],
|
||||
# dyn_args=None,
|
||||
# dyn_kwargs=None
|
||||
# )
|
||||
def statically_parse_adapter_dispatch(func_call, ctx, db_wrapper):
|
||||
possible_macro_calls = []
|
||||
# This captures an adapter.dispatch('<macro_name>') call.
|
||||
|
||||
func_name = None
|
||||
# macro_name positional argument
|
||||
if len(func_call.args) > 0:
|
||||
func_name = func_call.args[0].value
|
||||
if func_name:
|
||||
possible_macro_calls.append(func_name)
|
||||
|
||||
# packages positional argument
|
||||
packages = None
|
||||
macro_namespace = None
|
||||
packages_arg = None
|
||||
packages_arg_type = None
|
||||
|
||||
if len(func_call.args) > 1:
|
||||
packages_arg = func_call.args[1]
|
||||
# This can be a List or a Call
|
||||
packages_arg_type = type(func_call.args[1]).__name__
|
||||
|
||||
# keyword arguments
|
||||
if func_call.kwargs:
|
||||
for kwarg in func_call.kwargs:
|
||||
if kwarg.key == 'packages':
|
||||
# The packages keyword will be deprecated and
|
||||
# eventually removed
|
||||
packages_arg = kwarg.value
|
||||
# This can be a List or a Call
|
||||
packages_arg_type = type(kwarg.value).__name__
|
||||
elif kwarg.key == 'macro_name':
|
||||
# This will remain to enable static resolution
|
||||
if type(kwarg.value).__name__ == 'Const':
|
||||
func_name = kwarg.value.value
|
||||
possible_macro_calls.append(func_name)
|
||||
else:
|
||||
raise_compiler_error(f"The macro_name parameter ({kwarg.value.value}) "
|
||||
"to adapter.dispatch was not a string")
|
||||
elif kwarg.key == 'macro_namespace':
|
||||
# This will remain to enable static resolution
|
||||
kwarg_type = type(kwarg.value).__name__
|
||||
if kwarg_type == 'Const':
|
||||
macro_namespace = kwarg.value.value
|
||||
else:
|
||||
raise_compiler_error("The macro_namespace parameter to adapter.dispatch "
|
||||
f"is a {kwarg_type}, not a string")
|
||||
|
||||
# positional arguments
|
||||
if packages_arg:
|
||||
if packages_arg_type == 'List':
|
||||
# This will remain to enable static resolution
|
||||
packages = []
|
||||
for item in packages_arg.items:
|
||||
packages.append(item.value)
|
||||
elif packages_arg_type == 'Const':
|
||||
# This will remain to enable static resolution
|
||||
macro_namespace = packages_arg.value
|
||||
elif packages_arg_type == 'Call':
|
||||
# This is deprecated and should be removed eventually.
|
||||
# It is here to support (hackily) common ways of providing
|
||||
# a packages list to adapter.dispatch
|
||||
if (hasattr(packages_arg, 'node') and
|
||||
hasattr(packages_arg.node, 'node') and
|
||||
hasattr(packages_arg.node.node, 'name') and
|
||||
hasattr(packages_arg.node, 'attr')):
|
||||
package_name = packages_arg.node.node.name
|
||||
macro_name = packages_arg.node.attr
|
||||
if (macro_name.startswith('_get') and 'namespaces' in macro_name):
|
||||
# noqa: https://github.com/fishtown-analytics/dbt-utils/blob/9e9407b/macros/cross_db_utils/_get_utils_namespaces.sql
|
||||
var_name = f'{package_name}_dispatch_list'
|
||||
# hard code compatibility for fivetran_utils, just a teensy bit different
|
||||
# noqa: https://github.com/fivetran/dbt_fivetran_utils/blob/0978ba2/macros/_get_utils_namespaces.sql
|
||||
if package_name == 'fivetran_utils':
|
||||
default_packages = ['dbt_utils', 'fivetran_utils']
|
||||
else:
|
||||
default_packages = [package_name]
|
||||
|
||||
namespace_names = get_dispatch_list(ctx, var_name, default_packages)
|
||||
packages = []
|
||||
if namespace_names:
|
||||
packages.extend(namespace_names)
|
||||
else:
|
||||
msg = (
|
||||
f"As of v0.19.2, custom macros, such as '{macro_name}', are no longer "
|
||||
"supported in the 'packages' argument of 'adapter.dispatch()'.\n"
|
||||
f"See https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch "
|
||||
"for details."
|
||||
).strip()
|
||||
raise_compiler_error(msg)
|
||||
elif packages_arg_type == 'Add':
|
||||
# This logic is for when there is a variable and an addition of a list,
|
||||
# like: packages = (var('local_utils_dispatch_list', []) + ['local_utils2'])
|
||||
# This is deprecated and should be removed eventually.
|
||||
namespace_var = None
|
||||
default_namespaces = []
|
||||
# This might be a single call or it might be the 'left' piece in an addition
|
||||
for var_call in packages_arg.find_all(jinja2.nodes.Call):
|
||||
if (hasattr(var_call, 'node') and
|
||||
var_call.node.name == 'var' and
|
||||
hasattr(var_call, 'args')):
|
||||
namespace_var = var_call.args[0].value
|
||||
if hasattr(packages_arg, 'right'): # we have a default list of namespaces
|
||||
for item in packages_arg.right.items:
|
||||
default_namespaces.append(item.value)
|
||||
if namespace_var:
|
||||
namespace_names = get_dispatch_list(ctx, namespace_var, default_namespaces)
|
||||
packages = []
|
||||
if namespace_names:
|
||||
packages.extend(namespace_names)
|
||||
|
||||
if db_wrapper:
|
||||
macro = db_wrapper.dispatch(
|
||||
func_name,
|
||||
packages=packages,
|
||||
macro_namespace=macro_namespace
|
||||
).macro
|
||||
func_name = f'{macro.package_name}.{macro.name}'
|
||||
possible_macro_calls.append(func_name)
|
||||
else: # this is only for test/unit/test_macro_calls.py
|
||||
if macro_namespace:
|
||||
packages = [macro_namespace]
|
||||
if packages is None:
|
||||
packages = []
|
||||
for package_name in packages:
|
||||
possible_macro_calls.append(f'{package_name}.{func_name}')
|
||||
|
||||
return possible_macro_calls
|
||||
|
||||
|
||||
def get_dispatch_list(ctx, var_name, default_packages):
|
||||
namespace_list = None
|
||||
try:
|
||||
# match the logic currently used in package _get_namespaces() macro
|
||||
namespace_list = ctx['var'](var_name) + default_packages
|
||||
except Exception:
|
||||
pass
|
||||
namespace_list = namespace_list if namespace_list else default_packages
|
||||
return namespace_list
|
||||
@@ -28,11 +28,10 @@ def _wrap_exceptions(fn):
|
||||
attempt += 1
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
except requests.exceptions.ConnectionError as exc:
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as exc:
|
||||
if attempt < max_attempts:
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
raise RegistryException(
|
||||
'Unable to connect to registry hub'
|
||||
) from exc
|
||||
@@ -43,7 +42,7 @@ def _wrap_exceptions(fn):
|
||||
def _get(path, registry_base_url=None):
|
||||
url = _get_url(path, registry_base_url)
|
||||
logger.debug('Making package registry request: GET {}'.format(url))
|
||||
resp = requests.get(url)
|
||||
resp = requests.get(url, timeout=30)
|
||||
logger.debug('Response from registry: GET {} {}'.format(url,
|
||||
resp.status_code))
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -416,6 +416,9 @@ def run_cmd(
|
||||
full_env.update(env)
|
||||
|
||||
try:
|
||||
exe_pth = shutil.which(cmd[0])
|
||||
if exe_pth:
|
||||
cmd = [os.path.abspath(exe_pth)] + list(cmd[1:])
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
|
||||
@@ -12,8 +12,8 @@ from dbt.clients.system import make_directory
|
||||
from dbt.context.providers import generate_runtime_model
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.compiled import (
|
||||
CompiledSchemaTestNode,
|
||||
COMPILED_TYPES,
|
||||
CompiledSchemaTestNode,
|
||||
GraphMemberNode,
|
||||
InjectedCTE,
|
||||
ManifestNode,
|
||||
@@ -182,8 +182,7 @@ class Compiler:
|
||||
|
||||
def _get_relation_name(self, node: ParsedNode):
|
||||
relation_name = None
|
||||
if (node.resource_type in NodeType.refable() and
|
||||
not node.is_ephemeral_model):
|
||||
if node.is_relational and not node.is_ephemeral_model:
|
||||
adapter = get_adapter(self.config)
|
||||
relation_cls = adapter.Relation
|
||||
relation_name = str(relation_cls.create_from(self.config, node))
|
||||
@@ -250,19 +249,19 @@ class Compiler:
|
||||
|
||||
return str(parsed)
|
||||
|
||||
# This method is called by the 'compile_node' method. Starting
|
||||
# from the node that it is passed in, it will recursively call
|
||||
# itself using the 'extra_ctes'. The 'ephemeral' models do
|
||||
# not produce SQL that is executed directly, instead they
|
||||
# are rolled up into the models that refer to them by
|
||||
# inserting CTEs into the SQL.
|
||||
def _recursively_prepend_ctes(
|
||||
self,
|
||||
model: NonSourceCompiledNode,
|
||||
manifest: Manifest,
|
||||
extra_context: Optional[Dict[str, Any]],
|
||||
) -> Tuple[NonSourceCompiledNode, List[InjectedCTE]]:
|
||||
|
||||
"""This method is called by the 'compile_node' method. Starting
|
||||
from the node that it is passed in, it will recursively call
|
||||
itself using the 'extra_ctes'. The 'ephemeral' models do
|
||||
not produce SQL that is executed directly, instead they
|
||||
are rolled up into the models that refer to them by
|
||||
inserting CTEs into the SQL.
|
||||
"""
|
||||
if model.compiled_sql is None:
|
||||
raise RuntimeException(
|
||||
'Cannot inject ctes into an unparsed node', model
|
||||
@@ -320,19 +319,19 @@ class Compiler:
|
||||
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
|
||||
|
||||
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
|
||||
sql = f' {new_cte_name} as (\n{cte_model.compiled_sql}\n)'
|
||||
|
||||
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
|
||||
|
||||
# We don't save injected_sql into compiled sql for ephemeral models
|
||||
# because it will cause problems with processing of subsequent models.
|
||||
# Ephemeral models do not produce executable SQL of their own.
|
||||
if not model.is_ephemeral_model:
|
||||
injected_sql = self._inject_ctes_into_sql(
|
||||
model.compiled_sql,
|
||||
prepended_ctes,
|
||||
rendered_sql = (
|
||||
cte_model._pre_injected_sql or cte_model.compiled_sql
|
||||
)
|
||||
model.compiled_sql = injected_sql
|
||||
sql = f' {new_cte_name} as (\n{rendered_sql}\n)'
|
||||
|
||||
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
|
||||
|
||||
injected_sql = self._inject_ctes_into_sql(
|
||||
model.compiled_sql,
|
||||
prepended_ctes,
|
||||
)
|
||||
model._pre_injected_sql = model.compiled_sql
|
||||
model.compiled_sql = injected_sql
|
||||
model.extra_ctes_injected = True
|
||||
model.extra_ctes = prepended_ctes
|
||||
model.validate(model.to_dict(omit_none=True))
|
||||
@@ -442,18 +441,13 @@ class Compiler:
|
||||
logger.debug(f'Writing injected SQL for node "{node.unique_id}"')
|
||||
|
||||
if node.compiled_sql:
|
||||
node.build_path = node.write_node(
|
||||
node.compiled_path = node.write_node(
|
||||
self.config.target_path,
|
||||
'compiled',
|
||||
node.compiled_sql
|
||||
)
|
||||
return node
|
||||
|
||||
# This is the main entry point into this code. It's called by
|
||||
# CompileRunner.compile, GenericRPCRunner.compile, and
|
||||
# RunTask.get_hook_sql. It calls '_compile_node' to convert
|
||||
# the node into a compiled node, and then calls the
|
||||
# recursive method to "prepend" the ctes.
|
||||
def compile_node(
|
||||
self,
|
||||
node: ManifestNode,
|
||||
@@ -461,6 +455,12 @@ class Compiler:
|
||||
extra_context: Optional[Dict[str, Any]] = None,
|
||||
write: bool = True,
|
||||
) -> NonSourceCompiledNode:
|
||||
"""This is the main entry point into this code. It's called by
|
||||
CompileRunner.compile, GenericRPCRunner.compile, and
|
||||
RunTask.get_hook_sql. It calls '_compile_node' to convert
|
||||
the node into a compiled node, and then calls the
|
||||
recursive method to "prepend" the ctes.
|
||||
"""
|
||||
node = self._compile_node(node, manifest, extra_context)
|
||||
|
||||
node, _ = self._recursively_prepend_ctes(
|
||||
|
||||
@@ -349,6 +349,7 @@ class PartialProject(RenderComponents):
|
||||
if cfg.quoting is not None:
|
||||
quoting = cfg.quoting.to_dict(omit_none=True)
|
||||
|
||||
dispatch: List[Dict[str, Any]]
|
||||
models: Dict[str, Any]
|
||||
seeds: Dict[str, Any]
|
||||
snapshots: Dict[str, Any]
|
||||
@@ -356,6 +357,7 @@ class PartialProject(RenderComponents):
|
||||
tests: Dict[str, Any]
|
||||
vars_value: VarProvider
|
||||
|
||||
dispatch = cfg.dispatch
|
||||
models = cfg.models
|
||||
seeds = cfg.seeds
|
||||
snapshots = cfg.snapshots
|
||||
@@ -402,6 +404,7 @@ class PartialProject(RenderComponents):
|
||||
models=models,
|
||||
on_run_start=on_run_start,
|
||||
on_run_end=on_run_end,
|
||||
dispatch=dispatch,
|
||||
seeds=seeds,
|
||||
snapshots=snapshots,
|
||||
dbt_version=dbt_version,
|
||||
@@ -513,6 +516,7 @@ class Project:
|
||||
models: Dict[str, Any]
|
||||
on_run_start: List[str]
|
||||
on_run_end: List[str]
|
||||
dispatch: List[Dict[str, Any]]
|
||||
seeds: Dict[str, Any]
|
||||
snapshots: Dict[str, Any]
|
||||
sources: Dict[str, Any]
|
||||
@@ -572,6 +576,7 @@ class Project:
|
||||
'models': self.models,
|
||||
'on-run-start': self.on_run_start,
|
||||
'on-run-end': self.on_run_end,
|
||||
'dispatch': self.dispatch,
|
||||
'seeds': self.seeds,
|
||||
'snapshots': self.snapshots,
|
||||
'sources': self.sources,
|
||||
@@ -647,3 +652,9 @@ class Project:
|
||||
f'{list(self.selectors)}'
|
||||
)
|
||||
return self.selectors[name]
|
||||
|
||||
def get_macro_search_order(self, macro_namespace: str):
|
||||
for dispatch_entry in self.dispatch:
|
||||
if dispatch_entry['macro_namespace'] == macro_namespace:
|
||||
return dispatch_entry['search_order']
|
||||
return None
|
||||
|
||||
@@ -102,6 +102,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
models=project.models,
|
||||
on_run_start=project.on_run_start,
|
||||
on_run_end=project.on_run_end,
|
||||
dispatch=project.dispatch,
|
||||
seeds=project.seeds,
|
||||
snapshots=project.snapshots,
|
||||
dbt_version=project.dbt_version,
|
||||
@@ -328,6 +329,17 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
if self.dependencies is None:
|
||||
all_projects = {self.project_name: self}
|
||||
internal_packages = get_include_paths(self.credentials.type)
|
||||
# raise exception if fewer installed packages than in packages.yml
|
||||
count_packages_specified = len(self.packages.packages) # type: ignore
|
||||
count_packages_installed = len(tuple(self._get_project_directories()))
|
||||
if count_packages_specified > count_packages_installed:
|
||||
raise_compiler_error(
|
||||
f'dbt found {count_packages_specified} package(s) '
|
||||
f'specified in packages.yml, but only '
|
||||
f'{count_packages_installed} package(s) installed '
|
||||
f'in {self.modules_path}. Run "dbt deps" to '
|
||||
f'install package dependencies.'
|
||||
)
|
||||
project_paths = itertools.chain(
|
||||
internal_packages,
|
||||
self._get_project_directories()
|
||||
@@ -482,6 +494,7 @@ class UnsetProfileConfig(RuntimeConfig):
|
||||
models=project.models,
|
||||
on_run_start=project.on_run_start,
|
||||
on_run_end=project.on_run_end,
|
||||
dispatch=project.dispatch,
|
||||
seeds=project.seeds,
|
||||
snapshots=project.snapshots,
|
||||
dbt_version=project.dbt_version,
|
||||
|
||||
@@ -75,8 +75,26 @@ class SchemaYamlContext(ConfiguredContext):
|
||||
)
|
||||
|
||||
|
||||
class MacroResolvingContext(ConfiguredContext):
|
||||
def __init__(self, config):
|
||||
super().__init__(config)
|
||||
|
||||
@contextproperty
|
||||
def var(self) -> ConfiguredVar:
|
||||
return ConfiguredVar(
|
||||
self._ctx, self.config, self.config.project_name
|
||||
)
|
||||
|
||||
|
||||
def generate_schema_yml(
|
||||
config: AdapterRequiredConfig, project_name: str
|
||||
) -> Dict[str, Any]:
|
||||
ctx = SchemaYamlContext(config, project_name)
|
||||
return ctx.to_dict()
|
||||
|
||||
|
||||
def generate_macro_context(
|
||||
config: AdapterRequiredConfig,
|
||||
) -> Dict[str, Any]:
|
||||
ctx = MacroResolvingContext(config)
|
||||
return ctx.to_dict()
|
||||
|
||||
@@ -57,14 +57,19 @@ class DocsRuntimeContext(SchemaYamlContext):
|
||||
else:
|
||||
doc_invalid_args(self.node, args)
|
||||
|
||||
# ParsedDocumentation
|
||||
target_doc = self.manifest.resolve_doc(
|
||||
doc_name,
|
||||
doc_package_name,
|
||||
self._project_name,
|
||||
self.node.package_name,
|
||||
)
|
||||
|
||||
if target_doc is None:
|
||||
if target_doc:
|
||||
file_id = target_doc.file_id
|
||||
if file_id in self.manifest.files:
|
||||
source_file = self.manifest.files[file_id]
|
||||
source_file.add_node(self.node.unique_id)
|
||||
else:
|
||||
doc_target_not_found(self.node, doc_name, doc_package_name)
|
||||
|
||||
return target_doc.block_contents
|
||||
|
||||
@@ -145,17 +145,35 @@ class TestMacroNamespace:
|
||||
):
|
||||
self.macro_resolver = macro_resolver
|
||||
self.ctx = ctx
|
||||
self.node = node
|
||||
self.node = node # can be none
|
||||
self.thread_ctx = thread_ctx
|
||||
local_namespace = {}
|
||||
self.local_namespace = {}
|
||||
self.project_namespace = {}
|
||||
if depends_on_macros:
|
||||
for macro_unique_id in depends_on_macros:
|
||||
dep_macros = []
|
||||
self.recursively_get_depends_on_macros(depends_on_macros, dep_macros)
|
||||
for macro_unique_id in dep_macros:
|
||||
if macro_unique_id in self.macro_resolver.macros:
|
||||
# Split up the macro unique_id to get the project_name
|
||||
(_, project_name, macro_name) = macro_unique_id.split('.')
|
||||
# Save the plain macro_name in the local_namespace
|
||||
macro = self.macro_resolver.macros[macro_unique_id]
|
||||
local_namespace[macro.name] = MacroGenerator(
|
||||
macro_gen = MacroGenerator(
|
||||
macro, self.ctx, self.node, self.thread_ctx,
|
||||
)
|
||||
self.local_namespace = local_namespace
|
||||
self.local_namespace[macro_name] = macro_gen
|
||||
# We also need the two part macro name
|
||||
if project_name not in self.project_namespace:
|
||||
self.project_namespace[project_name] = {}
|
||||
self.project_namespace[project_name][macro_name] = macro_gen
|
||||
|
||||
def recursively_get_depends_on_macros(self, depends_on_macros, dep_macros):
|
||||
for macro_unique_id in depends_on_macros:
|
||||
dep_macros.append(macro_unique_id)
|
||||
if macro_unique_id in self.macro_resolver.macros:
|
||||
macro = self.macro_resolver.macros[macro_unique_id]
|
||||
if macro.depends_on.macros:
|
||||
self.recursively_get_depends_on_macros(macro.depends_on.macros, dep_macros)
|
||||
|
||||
def get_from_package(
|
||||
self, package_name: Optional[str], name: str
|
||||
|
||||
@@ -62,6 +62,7 @@ class ManifestContext(ConfiguredContext):
|
||||
# keys in the manifest dictionary
|
||||
if isinstance(self.namespace, TestMacroNamespace):
|
||||
dct.update(self.namespace.local_namespace)
|
||||
dct.update(self.namespace.project_namespace)
|
||||
else:
|
||||
dct.update(self.namespace)
|
||||
return dct
|
||||
|
||||
@@ -8,7 +8,9 @@ from typing_extensions import Protocol
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.adapters.base.column import Column
|
||||
from dbt.adapters.factory import get_adapter, get_adapter_package_names
|
||||
from dbt.adapters.factory import (
|
||||
get_adapter, get_adapter_package_names, get_adapter_type_names
|
||||
)
|
||||
from dbt.clients import agate_helper
|
||||
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack
|
||||
from dbt.config import RuntimeConfig, Project
|
||||
@@ -107,14 +109,18 @@ class BaseDatabaseWrapper:
|
||||
return self._adapter.commit_if_has_connection()
|
||||
|
||||
def _get_adapter_macro_prefixes(self) -> List[str]:
|
||||
# a future version of this could have plugins automatically call fall
|
||||
# back to their dependencies' dependencies by using
|
||||
# `get_adapter_type_names` instead of `[self.config.credentials.type]`
|
||||
search_prefixes = [self._adapter.type(), 'default']
|
||||
# order matters for dispatch:
|
||||
# 1. current adapter
|
||||
# 2. any parent adapters (dependencies)
|
||||
# 3. 'default'
|
||||
search_prefixes = get_adapter_type_names(self._adapter.type()) + ['default']
|
||||
return search_prefixes
|
||||
|
||||
def dispatch(
|
||||
self, macro_name: str, packages: Optional[List[str]] = None
|
||||
self,
|
||||
macro_name: str,
|
||||
macro_namespace: Optional[str] = None,
|
||||
packages: Optional[List[str]] = None,
|
||||
) -> MacroGenerator:
|
||||
search_packages: List[Optional[str]]
|
||||
|
||||
@@ -128,15 +134,25 @@ class BaseDatabaseWrapper:
|
||||
)
|
||||
raise CompilationException(msg)
|
||||
|
||||
if packages is None:
|
||||
if packages is not None:
|
||||
deprecations.warn('dispatch-packages', macro_name=macro_name)
|
||||
|
||||
namespace = packages if packages else macro_namespace
|
||||
|
||||
if namespace is None:
|
||||
search_packages = [None]
|
||||
elif isinstance(packages, str):
|
||||
raise CompilationException(
|
||||
f'In adapter.dispatch, got a string packages argument '
|
||||
f'("{packages}"), but packages should be None or a list.'
|
||||
)
|
||||
elif isinstance(namespace, str):
|
||||
search_packages = self._adapter.config.get_macro_search_order(namespace)
|
||||
if not search_packages and namespace in self._adapter.config.dependencies:
|
||||
search_packages = [namespace]
|
||||
if not search_packages:
|
||||
raise CompilationException(
|
||||
f'In adapter.dispatch, got a string packages argument '
|
||||
f'("{packages}"), but packages should be None or a list.'
|
||||
)
|
||||
else:
|
||||
search_packages = packages
|
||||
# Not a string and not None so must be a list
|
||||
search_packages = namespace
|
||||
|
||||
attempts = []
|
||||
|
||||
@@ -1179,14 +1195,13 @@ class ProviderContext(ManifestContext):
|
||||
"""
|
||||
deprecations.warn('adapter-macro', macro_name=name)
|
||||
original_name = name
|
||||
package_names: Optional[List[str]] = None
|
||||
package_name = None
|
||||
if '.' in name:
|
||||
package_name, name = name.split('.', 1)
|
||||
package_names = [package_name]
|
||||
|
||||
try:
|
||||
macro = self.db_wrapper.dispatch(
|
||||
macro_name=name, packages=package_names
|
||||
macro_name=name, macro_namespace=package_name
|
||||
)
|
||||
except CompilationException as exc:
|
||||
raise CompilationException(
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import hashlib
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
from mashumaro.types import SerializableType
|
||||
from typing import List, Optional, Union, Dict, Any
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
|
||||
from dbt.exceptions import InternalException
|
||||
|
||||
from .util import MacroKey, SourceKey
|
||||
from .util import SourceKey
|
||||
|
||||
|
||||
MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
|
||||
@@ -23,7 +22,20 @@ class ParseFileType(StrEnum):
|
||||
Seed = 'seed'
|
||||
Documentation = 'docs'
|
||||
Schema = 'schema'
|
||||
Hook = 'hook'
|
||||
Hook = 'hook' # not a real filetype, from dbt_project.yml
|
||||
|
||||
|
||||
parse_file_type_to_parser = {
|
||||
ParseFileType.Macro: 'MacroParser',
|
||||
ParseFileType.Model: 'ModelParser',
|
||||
ParseFileType.Snapshot: 'SnapshotParser',
|
||||
ParseFileType.Analysis: 'AnalysisParser',
|
||||
ParseFileType.Test: 'DataTestParser',
|
||||
ParseFileType.Seed: 'SeedParser',
|
||||
ParseFileType.Documentation: 'DocumentationParser',
|
||||
ParseFileType.Schema: 'SchemaParser',
|
||||
ParseFileType.Hook: 'HookParser',
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -122,7 +134,7 @@ class RemoteFile(dbtClassMixin):
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFile(dbtClassMixin):
|
||||
class BaseSourceFile(dbtClassMixin, SerializableType):
|
||||
"""Define a source file in dbt"""
|
||||
path: Union[FilePath, RemoteFile] # the path information
|
||||
checksum: FileHash
|
||||
@@ -131,43 +143,57 @@ class SourceFile(dbtClassMixin):
|
||||
# Parse file type: i.e. which parser will process this file
|
||||
parse_file_type: Optional[ParseFileType] = None
|
||||
# we don't want to serialize this
|
||||
_contents: Optional[str] = None
|
||||
contents: Optional[str] = None
|
||||
# the unique IDs contained in this file
|
||||
nodes: List[str] = field(default_factory=list)
|
||||
docs: List[str] = field(default_factory=list)
|
||||
macros: List[str] = field(default_factory=list)
|
||||
sources: List[str] = field(default_factory=list)
|
||||
exposures: List[str] = field(default_factory=list)
|
||||
# any node patches in this file. The entries are names, not unique ids!
|
||||
patches: List[str] = field(default_factory=list)
|
||||
# any macro patches in this file. The entries are package, name pairs.
|
||||
macro_patches: List[MacroKey] = field(default_factory=list)
|
||||
# any source patches in this file. The entries are package, name pairs
|
||||
source_patches: List[SourceKey] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def search_key(self) -> Optional[str]:
|
||||
def file_id(self):
|
||||
if isinstance(self.path, RemoteFile):
|
||||
return None
|
||||
if self.checksum.name == 'none':
|
||||
return None
|
||||
return self.path.search_key
|
||||
return f'{self.project_name}://{self.path.original_file_path}'
|
||||
|
||||
@property
|
||||
def contents(self) -> str:
|
||||
if self._contents is None:
|
||||
raise InternalException('SourceFile has no contents!')
|
||||
return self._contents
|
||||
|
||||
@contents.setter
|
||||
def contents(self, value):
|
||||
self._contents = value
|
||||
def _serialize(self):
|
||||
dct = self.to_dict()
|
||||
if 'pp_files' in dct:
|
||||
del dct['pp_files']
|
||||
if 'pp_test_index' in dct:
|
||||
del dct['pp_test_index']
|
||||
return dct
|
||||
|
||||
@classmethod
|
||||
def empty(cls, path: FilePath) -> 'SourceFile':
|
||||
self = cls(path=path, checksum=FileHash.empty())
|
||||
self.contents = ''
|
||||
return self
|
||||
def _deserialize(cls, dct: Dict[str, int]):
|
||||
if dct['parse_file_type'] == 'schema':
|
||||
# TODO: why are these keys even here
|
||||
if 'pp_files' in dct:
|
||||
del dct['pp_files']
|
||||
if 'pp_test_index' in dct:
|
||||
del dct['pp_test_index']
|
||||
sf = SchemaSourceFile.from_dict(dct)
|
||||
else:
|
||||
sf = SourceFile.from_dict(dct)
|
||||
return sf
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
# remove empty lists to save space
|
||||
dct_keys = list(dct.keys())
|
||||
for key in dct_keys:
|
||||
if isinstance(dct[key], list) and not dct[key]:
|
||||
del dct[key]
|
||||
# remove contents. Schema files will still have 'dict_from_yaml'
|
||||
# from the contents
|
||||
if 'contents' in dct:
|
||||
del dct['contents']
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFile(BaseSourceFile):
|
||||
nodes: List[str] = field(default_factory=list)
|
||||
docs: List[str] = field(default_factory=list)
|
||||
macros: List[str] = field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
def big_seed(cls, path: FilePath) -> 'SourceFile':
|
||||
@@ -176,8 +202,67 @@ class SourceFile(dbtClassMixin):
|
||||
self.contents = ''
|
||||
return self
|
||||
|
||||
def add_node(self, value):
|
||||
if value not in self.nodes:
|
||||
self.nodes.append(value)
|
||||
|
||||
# TODO: do this a different way. This remote file kludge isn't going
|
||||
# to work long term
|
||||
@classmethod
|
||||
def remote(cls, contents: str) -> 'SourceFile':
|
||||
self = cls(path=RemoteFile(), checksum=FileHash.empty())
|
||||
self.contents = contents
|
||||
def remote(cls, contents: str, project_name: str) -> 'SourceFile':
|
||||
self = cls(
|
||||
path=RemoteFile(),
|
||||
checksum=FileHash.from_contents(contents),
|
||||
project_name=project_name,
|
||||
contents=contents,
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchemaSourceFile(BaseSourceFile):
|
||||
dfy: Dict[str, Any] = field(default_factory=dict)
|
||||
# these are in the manifest.nodes dictionary
|
||||
tests: List[str] = field(default_factory=list)
|
||||
sources: List[str] = field(default_factory=list)
|
||||
exposures: List[str] = field(default_factory=list)
|
||||
# node patches contain models, seeds, snapshots, analyses
|
||||
ndp: List[str] = field(default_factory=list)
|
||||
# any macro patches in this file by macro unique_id.
|
||||
mcp: List[str] = field(default_factory=list)
|
||||
# any source patches in this file. The entries are package, name pairs
|
||||
# Patches are only against external sources. Sources can be
|
||||
# created too, but those are in 'sources'
|
||||
sop: List[SourceKey] = field(default_factory=list)
|
||||
pp_dict: Optional[Dict[str, Any]] = None
|
||||
pp_test_index: Optional[Dict[str, Any]] = None
|
||||
|
||||
@property
|
||||
def dict_from_yaml(self):
|
||||
return self.dfy
|
||||
|
||||
@property
|
||||
def node_patches(self):
|
||||
return self.ndp
|
||||
|
||||
@property
|
||||
def macro_patches(self):
|
||||
return self.mcp
|
||||
|
||||
@property
|
||||
def source_patches(self):
|
||||
return self.sop
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
if 'pp_files' in dct:
|
||||
del dct['pp_files']
|
||||
if 'pp_test_index' in dct:
|
||||
del dct['pp_test_index']
|
||||
return dct
|
||||
|
||||
def append_patch(self, yaml_key, unique_id):
|
||||
self.node_patches.append(unique_id)
|
||||
|
||||
|
||||
AnySourceFile = Union[SchemaSourceFile, SourceFile]
|
||||
|
||||
@@ -43,6 +43,7 @@ class CompiledNode(ParsedNode, CompiledNodeMixin):
|
||||
extra_ctes_injected: bool = False
|
||||
extra_ctes: List[InjectedCTE] = field(default_factory=list)
|
||||
relation_name: Optional[str] = None
|
||||
_pre_injected_sql: Optional[str] = None
|
||||
|
||||
def set_cte(self, cte_id: str, sql: str):
|
||||
"""This is the equivalent of what self.extra_ctes[cte_id] = sql would
|
||||
@@ -55,6 +56,12 @@ class CompiledNode(ParsedNode, CompiledNodeMixin):
|
||||
else:
|
||||
self.extra_ctes.append(InjectedCTE(id=cte_id, sql=sql))
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
if '_pre_injected_sql' in dct:
|
||||
del dct['_pre_injected_sql']
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompiledAnalysisNode(CompiledNode):
|
||||
@@ -112,15 +119,6 @@ class CompiledSchemaTestNode(CompiledNode, HasTestMetadata):
|
||||
column_name: Optional[str] = None
|
||||
config: TestConfig = field(default_factory=TestConfig)
|
||||
|
||||
def same_config(self, other) -> bool:
|
||||
return (
|
||||
self.unrendered_config.get('severity') ==
|
||||
other.unrendered_config.get('severity')
|
||||
)
|
||||
|
||||
def same_column_name(self, other) -> bool:
|
||||
return self.column_name == other.column_name
|
||||
|
||||
def same_contents(self, other) -> bool:
|
||||
if other is None:
|
||||
return False
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import abc
|
||||
import enum
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import chain, islice
|
||||
from mashumaro import DataClassMessagePackMixin
|
||||
from multiprocessing.synchronize import Lock
|
||||
from typing import (
|
||||
Dict, List, Optional, Union, Mapping, MutableMapping, Any, Set, Tuple,
|
||||
TypeVar, Callable, Iterable, Generic, cast, AbstractSet
|
||||
TypeVar, Callable, Iterable, Generic, cast, AbstractSet, ClassVar
|
||||
)
|
||||
from typing_extensions import Protocol
|
||||
from uuid import UUID
|
||||
@@ -19,22 +19,21 @@ from dbt.contracts.graph.parsed import (
|
||||
UnpatchedSourceDefinition, ManifestNodes
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import SourcePatch
|
||||
from dbt.contracts.files import SourceFile, FileHash, RemoteFile
|
||||
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
|
||||
from dbt.contracts.util import (
|
||||
BaseArtifactMetadata, MacroKey, SourceKey, ArtifactMixin, schema_version
|
||||
BaseArtifactMetadata, SourceKey, ArtifactMixin, schema_version
|
||||
)
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dbt.exceptions import (
|
||||
InternalException, CompilationException,
|
||||
CompilationException,
|
||||
raise_duplicate_resource_name, raise_compiler_error, warn_or_error,
|
||||
raise_invalid_patch, raise_duplicate_patch_name,
|
||||
raise_duplicate_patch_name,
|
||||
raise_duplicate_macro_patch_name, raise_duplicate_source_patch_name,
|
||||
)
|
||||
from dbt.helper_types import PathSet
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.ui import line_wrap_message
|
||||
from dbt import deprecations
|
||||
from dbt import flags
|
||||
from dbt import tracking
|
||||
import dbt.utils
|
||||
@@ -46,72 +45,70 @@ RefName = str
|
||||
UniqueID = str
|
||||
|
||||
|
||||
K_T = TypeVar('K_T')
|
||||
V_T = TypeVar('V_T')
|
||||
|
||||
|
||||
class PackageAwareCache(Generic[K_T, V_T]):
|
||||
def __init__(self, manifest: 'Manifest'):
|
||||
self.storage: Dict[K_T, Dict[PackageName, UniqueID]] = {}
|
||||
self._manifest = manifest
|
||||
self.populate()
|
||||
|
||||
@abc.abstractmethod
|
||||
def populate(self):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def perform_lookup(self, unique_id: UniqueID) -> V_T:
|
||||
pass
|
||||
|
||||
def find_cached_value(
|
||||
self, key: K_T, package: Optional[PackageName]
|
||||
) -> Optional[V_T]:
|
||||
unique_id = self.find_unique_id_for_package(key, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id)
|
||||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]):
|
||||
if key not in storage:
|
||||
return None
|
||||
|
||||
def find_unique_id_for_package(
|
||||
self, key: K_T, package: Optional[PackageName]
|
||||
) -> Optional[UniqueID]:
|
||||
if key not in self.storage:
|
||||
pkg_dct: Mapping[PackageName, UniqueID] = storage[key]
|
||||
|
||||
if package is None:
|
||||
if not pkg_dct:
|
||||
return None
|
||||
|
||||
pkg_dct: Mapping[PackageName, UniqueID] = self.storage[key]
|
||||
|
||||
if package is None:
|
||||
if not pkg_dct:
|
||||
return None
|
||||
else:
|
||||
return next(iter(pkg_dct.values()))
|
||||
elif package in pkg_dct:
|
||||
return pkg_dct[package]
|
||||
else:
|
||||
return None
|
||||
return next(iter(pkg_dct.values()))
|
||||
elif package in pkg_dct:
|
||||
return pkg_dct[package]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class DocCache(PackageAwareCache[DocName, ParsedDocumentation]):
|
||||
class DocLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: 'Manifest'):
|
||||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||||
return find_unique_id_for_package(self.storage, key, package)
|
||||
|
||||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||||
unique_id = self.get_unique_id(key, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_doc(self, doc: ParsedDocumentation):
|
||||
if doc.name not in self.storage:
|
||||
self.storage[doc.name] = {}
|
||||
self.storage[doc.name][doc.package_name] = doc.unique_id
|
||||
|
||||
def populate(self):
|
||||
for doc in self._manifest.docs.values():
|
||||
def populate(self, manifest):
|
||||
for doc in manifest.docs.values():
|
||||
self.add_doc(doc)
|
||||
|
||||
def perform_lookup(
|
||||
self, unique_id: UniqueID
|
||||
self, unique_id: UniqueID, manifest
|
||||
) -> ParsedDocumentation:
|
||||
if unique_id not in self._manifest.docs:
|
||||
if unique_id not in manifest.docs:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Doc {unique_id} found in cache but not found in manifest'
|
||||
)
|
||||
return self._manifest.docs[unique_id]
|
||||
return manifest.docs[unique_id]
|
||||
|
||||
|
||||
class SourceCache(PackageAwareCache[SourceKey, ParsedSourceDefinition]):
|
||||
class SourceLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: 'Manifest'):
|
||||
self.storage: Dict[Tuple[str, str], Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||||
return find_unique_id_for_package(self.storage, key, package)
|
||||
|
||||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||||
unique_id = self.get_unique_id(key, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_source(self, source: ParsedSourceDefinition):
|
||||
key = (source.source_name, source.name)
|
||||
if key not in self.storage:
|
||||
@@ -119,47 +116,63 @@ class SourceCache(PackageAwareCache[SourceKey, ParsedSourceDefinition]):
|
||||
|
||||
self.storage[key][source.package_name] = source.unique_id
|
||||
|
||||
def populate(self):
|
||||
for source in self._manifest.sources.values():
|
||||
def populate(self, manifest):
|
||||
for source in manifest.sources.values():
|
||||
if hasattr(source, 'source_name'):
|
||||
self.add_source(source)
|
||||
|
||||
def perform_lookup(
|
||||
self, unique_id: UniqueID
|
||||
self, unique_id: UniqueID, manifest: 'Manifest'
|
||||
) -> ParsedSourceDefinition:
|
||||
if unique_id not in self._manifest.sources:
|
||||
if unique_id not in manifest.sources:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Source {unique_id} found in cache but not found in manifest'
|
||||
)
|
||||
return self._manifest.sources[unique_id]
|
||||
return manifest.sources[unique_id]
|
||||
|
||||
|
||||
class RefableCache(PackageAwareCache[RefName, ManifestNode]):
|
||||
class RefableLookup(dbtClassMixin):
|
||||
# model, seed, snapshot
|
||||
_lookup_types: ClassVar[set] = set(NodeType.refable())
|
||||
|
||||
# refables are actually unique, so the Dict[PackageName, UniqueID] will
|
||||
# only ever have exactly one value, but doing 3 dict lookups instead of 1
|
||||
# is not a big deal at all and retains consistency
|
||||
def __init__(self, manifest: 'Manifest'):
|
||||
self._cached_types = set(NodeType.refable())
|
||||
super().__init__(manifest)
|
||||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, key, package: Optional[PackageName]):
|
||||
return find_unique_id_for_package(self.storage, key, package)
|
||||
|
||||
def find(self, key, package: Optional[PackageName], manifest: 'Manifest'):
|
||||
unique_id = self.get_unique_id(key, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_node(self, node: ManifestNode):
|
||||
if node.resource_type in self._cached_types:
|
||||
if node.resource_type in self._lookup_types:
|
||||
if node.name not in self.storage:
|
||||
self.storage[node.name] = {}
|
||||
self.storage[node.name][node.package_name] = node.unique_id
|
||||
|
||||
def populate(self):
|
||||
for node in self._manifest.nodes.values():
|
||||
def populate(self, manifest):
|
||||
for node in manifest.nodes.values():
|
||||
self.add_node(node)
|
||||
|
||||
def perform_lookup(
|
||||
self, unique_id: UniqueID
|
||||
self, unique_id: UniqueID, manifest
|
||||
) -> ManifestNode:
|
||||
if unique_id not in self._manifest.nodes:
|
||||
if unique_id not in manifest.nodes:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Node {unique_id} found in cache but not found in manifest'
|
||||
)
|
||||
return self._manifest.nodes[unique_id]
|
||||
return manifest.nodes[unique_id]
|
||||
|
||||
|
||||
class AnalysisLookup(RefableLookup):
|
||||
_lookup_types: ClassVar[set] = set(NodeType.Analysis)
|
||||
|
||||
|
||||
def _search_packages(
|
||||
@@ -514,39 +527,55 @@ class MacroMethods:
|
||||
|
||||
@dataclass
|
||||
class ManifestStateCheck(dbtClassMixin):
|
||||
vars_hash: FileHash
|
||||
profile_hash: FileHash
|
||||
project_hashes: MutableMapping[str, FileHash]
|
||||
vars_hash: FileHash = field(default_factory=FileHash.empty)
|
||||
profile_hash: FileHash = field(default_factory=FileHash.empty)
|
||||
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Manifest(MacroMethods):
|
||||
class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
"""The manifest for the full graph, after parsing and during compilation.
|
||||
"""
|
||||
# These attributes are both positional and by keyword. If an attribute
|
||||
# is added it must all be added in the __reduce_ex__ method in the
|
||||
# args tuple in the right position.
|
||||
nodes: MutableMapping[str, ManifestNode]
|
||||
sources: MutableMapping[str, ParsedSourceDefinition]
|
||||
macros: MutableMapping[str, ParsedMacro]
|
||||
docs: MutableMapping[str, ParsedDocumentation]
|
||||
exposures: MutableMapping[str, ParsedExposure]
|
||||
selectors: MutableMapping[str, Any]
|
||||
disabled: List[CompileResultNode]
|
||||
files: MutableMapping[str, SourceFile]
|
||||
nodes: MutableMapping[str, ManifestNode] = field(default_factory=dict)
|
||||
sources: MutableMapping[str, ParsedSourceDefinition] = field(default_factory=dict)
|
||||
macros: MutableMapping[str, ParsedMacro] = field(default_factory=dict)
|
||||
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
|
||||
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
|
||||
selectors: MutableMapping[str, Any] = field(default_factory=dict)
|
||||
disabled: List[CompileResultNode] = field(default_factory=list)
|
||||
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
||||
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
|
||||
flat_graph: Dict[str, Any] = field(default_factory=dict)
|
||||
state_check: Optional[ManifestStateCheck] = None
|
||||
state_check: ManifestStateCheck = field(default_factory=ManifestStateCheck)
|
||||
# Moved from the ParseResult object
|
||||
macro_patches: MutableMapping[MacroKey, ParsedMacroPatch] = field(default_factory=dict)
|
||||
patches: MutableMapping[str, ParsedNodePatch] = field(default_factory=dict)
|
||||
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
|
||||
# following is from ParseResult
|
||||
_disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
|
||||
_docs_cache: Optional[DocCache] = None
|
||||
_sources_cache: Optional[SourceCache] = None
|
||||
_refs_cache: Optional[RefableCache] = None
|
||||
_lock: Lock = field(default_factory=flags.MP_CONTEXT.Lock)
|
||||
_doc_lookup: Optional[DocLookup] = field(
|
||||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||||
)
|
||||
_source_lookup: Optional[SourceLookup] = field(
|
||||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||||
)
|
||||
_ref_lookup: Optional[RefableLookup] = field(
|
||||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||||
)
|
||||
_analysis_lookup: Optional[AnalysisLookup] = field(
|
||||
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
|
||||
)
|
||||
_lock: Lock = field(
|
||||
default_factory=flags.MP_CONTEXT.Lock,
|
||||
metadata={'serialize': lambda x: None, 'deserialize': lambda x: flags.MP_CONTEXT.Lock}
|
||||
)
|
||||
|
||||
def __pre_serialize__(self):
|
||||
# serialization won't work with anything except an empty source_patches because
|
||||
# tuple keys are not supported, so ensure it's empty
|
||||
self.source_patches = {}
|
||||
return self
|
||||
|
||||
def sync_update_node(
|
||||
self, new_node: NonSourceCompiledNode
|
||||
@@ -662,102 +691,59 @@ class Manifest(MacroMethods):
|
||||
resource_fqns[resource_type_plural].add(tuple(resource.fqn))
|
||||
return resource_fqns
|
||||
|
||||
def add_nodes(self, new_nodes: Mapping[str, ManifestNode]):
|
||||
"""Add the given dict of new nodes to the manifest."""
|
||||
for unique_id, node in new_nodes.items():
|
||||
if unique_id in self.nodes:
|
||||
raise_duplicate_resource_name(node, self.nodes[unique_id])
|
||||
self.nodes[unique_id] = node
|
||||
# fixup the cache if it exists.
|
||||
if self._refs_cache is not None:
|
||||
if node.resource_type in NodeType.refable():
|
||||
self._refs_cache.add_node(node)
|
||||
|
||||
# This is called by 'parse_patch' in the NodePatchParser
|
||||
def add_patch(
|
||||
self, source_file: SourceFile, patch: ParsedNodePatch,
|
||||
self, source_file: SchemaSourceFile, patch: ParsedNodePatch,
|
||||
) -> None:
|
||||
if patch.yaml_key in ['models', 'seeds', 'snapshots']:
|
||||
unique_id = self.ref_lookup.get_unique_id(patch.name, None)
|
||||
elif patch.yaml_key == 'analyses':
|
||||
unique_id = self.analysis_lookup.get_unique_id(patch.name, None)
|
||||
else:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Unexpected yaml_key {patch.yaml_key} for patch in '
|
||||
f'file {source_file.path.original_file_path}'
|
||||
)
|
||||
if unique_id is None:
|
||||
# This will usually happen when a node is disabled
|
||||
return
|
||||
|
||||
# patches can't be overwritten
|
||||
if patch.name in self.patches:
|
||||
raise_duplicate_patch_name(patch, self.patches[patch.name])
|
||||
self.patches[patch.name] = patch
|
||||
self.get_file(source_file).patches.append(patch.name)
|
||||
node = self.nodes.get(unique_id)
|
||||
if node:
|
||||
if node.patch_path:
|
||||
package_name, existing_file_path = node.patch_path.split('://')
|
||||
raise_duplicate_patch_name(patch, existing_file_path)
|
||||
source_file.append_patch(patch.yaml_key, unique_id)
|
||||
node.patch(patch)
|
||||
|
||||
def add_macro_patch(
|
||||
self, source_file: SourceFile, patch: ParsedMacroPatch,
|
||||
self, source_file: SchemaSourceFile, patch: ParsedMacroPatch,
|
||||
) -> None:
|
||||
# macros are fully namespaced
|
||||
key = (patch.package_name, patch.name)
|
||||
if key in self.macro_patches:
|
||||
raise_duplicate_macro_patch_name(patch, self.macro_patches[key])
|
||||
self.macro_patches[key] = patch
|
||||
self.get_file(source_file).macro_patches.append(key)
|
||||
unique_id = f'macro.{patch.package_name}.{patch.name}'
|
||||
macro = self.macros.get(unique_id)
|
||||
if not macro:
|
||||
warn_or_error(
|
||||
f'WARNING: Found documentation for macro "{patch.name}" '
|
||||
f'which was not found'
|
||||
)
|
||||
return
|
||||
if macro.patch_path:
|
||||
package_name, existing_file_path = macro.patch_path.split('://')
|
||||
raise_duplicate_macro_patch_name(patch, existing_file_path)
|
||||
source_file.macro_patches.append(unique_id)
|
||||
macro.patch(patch)
|
||||
|
||||
def add_source_patch(
|
||||
self, source_file: SourceFile, patch: SourcePatch,
|
||||
self, source_file: SchemaSourceFile, patch: SourcePatch,
|
||||
) -> None:
|
||||
# source patches must be unique
|
||||
key = (patch.overrides, patch.name)
|
||||
if key in self.source_patches:
|
||||
raise_duplicate_source_patch_name(patch, self.source_patches[key])
|
||||
self.source_patches[key] = patch
|
||||
self.get_file(source_file).source_patches.append(key)
|
||||
|
||||
def patch_macros(self) -> None:
|
||||
for macro in self.macros.values():
|
||||
key = (macro.package_name, macro.name)
|
||||
patch = self.macro_patches.pop(key, None)
|
||||
if not patch:
|
||||
continue
|
||||
macro.patch(patch)
|
||||
|
||||
if self.macro_patches:
|
||||
for patch in self.macro_patches.values():
|
||||
warn_or_error(
|
||||
f'WARNING: Found documentation for macro "{patch.name}" '
|
||||
f'which was not found'
|
||||
)
|
||||
|
||||
def patch_nodes(self) -> None:
|
||||
"""Patch nodes with the given dict of patches. Note that this consumes
|
||||
the input!
|
||||
This relies on the fact that all nodes have unique _name_ fields, not
|
||||
just unique unique_id fields.
|
||||
"""
|
||||
# because we don't have any mapping from node _names_ to nodes, and we
|
||||
# only have the node name in the patch, we have to iterate over all the
|
||||
# nodes looking for matching names. We could use a NameSearcher if we
|
||||
# were ok with doing an O(n*m) search (one nodes scan per patch)
|
||||
# Q: could we save patches by node unique_ids instead, or convert
|
||||
# between names and node ids?
|
||||
for node in self.nodes.values():
|
||||
patch = self.patches.pop(node.name, None)
|
||||
if not patch:
|
||||
continue
|
||||
|
||||
expected_key = node.resource_type.pluralize()
|
||||
if expected_key != patch.yaml_key:
|
||||
if patch.yaml_key == 'models':
|
||||
deprecations.warn(
|
||||
'models-key-mismatch',
|
||||
patch=patch, node=node, expected_key=expected_key
|
||||
)
|
||||
else:
|
||||
raise_invalid_patch(
|
||||
node, patch.yaml_key, patch.original_file_path
|
||||
)
|
||||
|
||||
node.patch(patch)
|
||||
|
||||
# If anything is left in self.patches, it means that the node for
|
||||
# that patch wasn't found.
|
||||
if self.patches:
|
||||
for patch in self.patches.values():
|
||||
# since patches aren't nodes, we can't use the existing
|
||||
# target_not_found warning
|
||||
logger.debug((
|
||||
'WARNING: Found documentation for resource "{}" which was '
|
||||
'not found or is disabled').format(patch.name)
|
||||
)
|
||||
source_file.source_patches.append(key)
|
||||
|
||||
def get_used_schemas(self, resource_types=None):
|
||||
return frozenset({
|
||||
@@ -787,14 +773,18 @@ class Manifest(MacroMethods):
|
||||
state_check=_deepcopy(self.state_check),
|
||||
)
|
||||
|
||||
def writable_manifest(self):
|
||||
def build_parent_and_child_maps(self):
|
||||
edge_members = list(chain(
|
||||
self.nodes.values(),
|
||||
self.sources.values(),
|
||||
self.exposures.values(),
|
||||
))
|
||||
forward_edges, backward_edges = build_edges(edge_members)
|
||||
self.child_map = forward_edges
|
||||
self.parent_map = backward_edges
|
||||
|
||||
def writable_manifest(self):
|
||||
self.build_parent_and_child_maps()
|
||||
return WritableManifest(
|
||||
nodes=self.nodes,
|
||||
sources=self.sources,
|
||||
@@ -804,18 +794,15 @@ class Manifest(MacroMethods):
|
||||
selectors=self.selectors,
|
||||
metadata=self.metadata,
|
||||
disabled=self.disabled,
|
||||
child_map=forward_edges,
|
||||
parent_map=backward_edges,
|
||||
child_map=self.child_map,
|
||||
parent_map=self.parent_map,
|
||||
)
|
||||
|
||||
# When 'to_dict' is called on the Manifest, it substitues a
|
||||
# WritableManifest
|
||||
def __pre_serialize__(self):
|
||||
return self.writable_manifest()
|
||||
|
||||
def write(self, path):
|
||||
self.writable_manifest().write(path)
|
||||
|
||||
# Called in dbt.compilation.Linker.write_graph and
|
||||
# dbt.graph.queue.get and ._include_in_cost
|
||||
def expect(self, unique_id: str) -> GraphMemberNode:
|
||||
if unique_id in self.nodes:
|
||||
return self.nodes[unique_id]
|
||||
@@ -830,29 +817,40 @@ class Manifest(MacroMethods):
|
||||
)
|
||||
|
||||
@property
|
||||
def docs_cache(self) -> DocCache:
|
||||
if self._docs_cache is not None:
|
||||
return self._docs_cache
|
||||
cache = DocCache(self)
|
||||
self._docs_cache = cache
|
||||
return cache
|
||||
def doc_lookup(self) -> DocLookup:
|
||||
if self._doc_lookup is None:
|
||||
self._doc_lookup = DocLookup(self)
|
||||
return self._doc_lookup
|
||||
|
||||
def rebuild_doc_lookup(self):
|
||||
self._doc_lookup = DocLookup(self)
|
||||
|
||||
@property
|
||||
def source_cache(self) -> SourceCache:
|
||||
if self._sources_cache is not None:
|
||||
return self._sources_cache
|
||||
cache = SourceCache(self)
|
||||
self._sources_cache = cache
|
||||
return cache
|
||||
def source_lookup(self) -> SourceLookup:
|
||||
if self._source_lookup is None:
|
||||
self._source_lookup = SourceLookup(self)
|
||||
return self._source_lookup
|
||||
|
||||
def rebuild_source_lookup(self):
|
||||
self._source_lookup = SourceLookup(self)
|
||||
|
||||
@property
|
||||
def refs_cache(self) -> RefableCache:
|
||||
if self._refs_cache is not None:
|
||||
return self._refs_cache
|
||||
cache = RefableCache(self)
|
||||
self._refs_cache = cache
|
||||
return cache
|
||||
def ref_lookup(self) -> RefableLookup:
|
||||
if self._ref_lookup is None:
|
||||
self._ref_lookup = RefableLookup(self)
|
||||
return self._ref_lookup
|
||||
|
||||
def rebuild_ref_lookup(self):
|
||||
self._ref_lookup = RefableLookup(self)
|
||||
|
||||
@property
|
||||
def analysis_lookup(self) -> AnalysisLookup:
|
||||
if self._analysis_lookup is None:
|
||||
self._analysis_lookup = AnalysisLookup(self)
|
||||
return self._analysis_lookup
|
||||
|
||||
# Called by dbt.parser.manifest._resolve_refs_for_exposure
|
||||
# and dbt.parser.manifest._process_refs_for_node
|
||||
def resolve_ref(
|
||||
self,
|
||||
target_model_name: str,
|
||||
@@ -868,7 +866,7 @@ class Manifest(MacroMethods):
|
||||
current_project, node_package, target_model_package
|
||||
)
|
||||
for pkg in candidates:
|
||||
node = self.refs_cache.find_cached_value(target_model_name, pkg)
|
||||
node = self.ref_lookup.find(target_model_name, pkg, self)
|
||||
|
||||
if node is not None and node.config.enabled:
|
||||
return node
|
||||
@@ -883,6 +881,8 @@ class Manifest(MacroMethods):
|
||||
return Disabled(disabled)
|
||||
return None
|
||||
|
||||
# Called by dbt.parser.manifest._resolve_sources_for_exposure
|
||||
# and dbt.parser.manifest._process_source_for_node
|
||||
def resolve_source(
|
||||
self,
|
||||
target_source_name: str,
|
||||
@@ -897,7 +897,7 @@ class Manifest(MacroMethods):
|
||||
disabled: Optional[ParsedSourceDefinition] = None
|
||||
|
||||
for pkg in candidates:
|
||||
source = self.source_cache.find_cached_value(key, pkg)
|
||||
source = self.source_lookup.find(key, pkg, self)
|
||||
if source is not None and source.config.enabled:
|
||||
return source
|
||||
|
||||
@@ -910,6 +910,7 @@ class Manifest(MacroMethods):
|
||||
return Disabled(disabled)
|
||||
return None
|
||||
|
||||
# Called by DocsRuntimeContext.doc
|
||||
def resolve_doc(
|
||||
self,
|
||||
name: str,
|
||||
@@ -926,11 +927,12 @@ class Manifest(MacroMethods):
|
||||
)
|
||||
|
||||
for pkg in candidates:
|
||||
result = self.docs_cache.find_cached_value(name, pkg)
|
||||
result = self.doc_lookup.find(name, pkg, self)
|
||||
if result is not None:
|
||||
return result
|
||||
return None
|
||||
|
||||
# Called by RunTask.defer_to_manifest
|
||||
def merge_from_artifact(
|
||||
self,
|
||||
adapter,
|
||||
@@ -964,13 +966,6 @@ class Manifest(MacroMethods):
|
||||
)
|
||||
|
||||
# Methods that were formerly in ParseResult
|
||||
def get_file(self, source_file: SourceFile) -> SourceFile:
|
||||
key = source_file.search_key
|
||||
if key is None:
|
||||
return source_file
|
||||
if key not in self.files:
|
||||
self.files[key] = source_file
|
||||
return self.files[key]
|
||||
|
||||
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
|
||||
if macro.unique_id in self.macros:
|
||||
@@ -997,10 +992,10 @@ class Manifest(MacroMethods):
|
||||
raise_compiler_error(msg)
|
||||
|
||||
self.macros[macro.unique_id] = macro
|
||||
self.get_file(source_file).macros.append(macro.unique_id)
|
||||
source_file.macros.append(macro.unique_id)
|
||||
|
||||
def has_file(self, source_file: SourceFile) -> bool:
|
||||
key = source_file.search_key
|
||||
key = source_file.file_id
|
||||
if key is None:
|
||||
return False
|
||||
if key not in self.files:
|
||||
@@ -1009,26 +1004,29 @@ class Manifest(MacroMethods):
|
||||
return my_checksum == source_file.checksum
|
||||
|
||||
def add_source(
|
||||
self, source_file: SourceFile, source: UnpatchedSourceDefinition
|
||||
self, source_file: SchemaSourceFile, source: UnpatchedSourceDefinition
|
||||
):
|
||||
# sources can't be overwritten!
|
||||
_check_duplicates(source, self.sources)
|
||||
self.sources[source.unique_id] = source # type: ignore
|
||||
self.get_file(source_file).sources.append(source.unique_id)
|
||||
source_file.sources.append(source.unique_id)
|
||||
|
||||
def add_node_nofile(self, node: ManifestNodes):
|
||||
# nodes can't be overwritten!
|
||||
_check_duplicates(node, self.nodes)
|
||||
self.nodes[node.unique_id] = node
|
||||
|
||||
def add_node(self, source_file: SourceFile, node: ManifestNodes):
|
||||
def add_node(self, source_file: AnySourceFile, node: ManifestNodes):
|
||||
self.add_node_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
if isinstance(source_file, SchemaSourceFile):
|
||||
source_file.tests.append(node.unique_id)
|
||||
else:
|
||||
source_file.nodes.append(node.unique_id)
|
||||
|
||||
def add_exposure(self, source_file: SourceFile, exposure: ParsedExposure):
|
||||
def add_exposure(self, source_file: SchemaSourceFile, exposure: ParsedExposure):
|
||||
_check_duplicates(exposure, self.exposures)
|
||||
self.exposures[exposure.unique_id] = exposure
|
||||
self.get_file(source_file).exposures.append(exposure.unique_id)
|
||||
source_file.exposures.append(exposure.unique_id)
|
||||
|
||||
def add_disabled_nofile(self, node: CompileResultNode):
|
||||
if node.unique_id in self._disabled:
|
||||
@@ -1036,137 +1034,18 @@ class Manifest(MacroMethods):
|
||||
else:
|
||||
self._disabled[node.unique_id] = [node]
|
||||
|
||||
def add_disabled(self, source_file: SourceFile, node: CompileResultNode):
|
||||
def add_disabled(self, source_file: AnySourceFile, node: CompileResultNode):
|
||||
self.add_disabled_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
if isinstance(source_file, SchemaSourceFile):
|
||||
source_file.tests.append(node.unique_id)
|
||||
else:
|
||||
source_file.nodes.append(node.unique_id)
|
||||
|
||||
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
|
||||
_check_duplicates(doc, self.docs)
|
||||
self.docs[doc.unique_id] = doc
|
||||
self.get_file(source_file).docs.append(doc.unique_id)
|
||||
source_file.docs.append(doc.unique_id)
|
||||
|
||||
def _get_disabled(
|
||||
self,
|
||||
unique_id: str,
|
||||
match_file: SourceFile,
|
||||
) -> List[CompileResultNode]:
|
||||
if unique_id not in self._disabled:
|
||||
raise InternalException(
|
||||
'called _get_disabled with id={}, but it does not exist'
|
||||
.format(unique_id)
|
||||
)
|
||||
return [
|
||||
n for n in self._disabled[unique_id]
|
||||
if n.original_file_path == match_file.path.original_file_path
|
||||
]
|
||||
|
||||
# This is only used by 'sanitized_update' which processes "old_manifest"
|
||||
def _process_node(
|
||||
self,
|
||||
node_id: str,
|
||||
source_file: SourceFile,
|
||||
old_file: SourceFile,
|
||||
old_manifest: Any,
|
||||
) -> None:
|
||||
"""Nodes are a special kind of complicated - there can be multiple
|
||||
with the same name, as long as all but one are disabled.
|
||||
|
||||
Only handle nodes where the matching node has the same resource type
|
||||
as the current parser.
|
||||
"""
|
||||
source_path = source_file.path.original_file_path
|
||||
found: bool = False
|
||||
if node_id in old_manifest.nodes:
|
||||
old_node = old_manifest.nodes[node_id]
|
||||
if old_node.original_file_path == source_path:
|
||||
self.add_node(source_file, old_node)
|
||||
found = True
|
||||
|
||||
if node_id in old_manifest._disabled:
|
||||
matches = old_manifest._get_disabled(node_id, source_file)
|
||||
for match in matches:
|
||||
self.add_disabled(source_file, match)
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
raise CompilationException(
|
||||
'Expected to find "{}" in cached "manifest.nodes" or '
|
||||
'"manifest.disabled" based on cached file information: {}!'
|
||||
.format(node_id, old_file)
|
||||
)
|
||||
|
||||
# This is called by ManifestLoader._get_cached/parse_with_cache,
|
||||
# which handles updating the ManifestLoader results with information
|
||||
# from the "old_manifest", i.e. the pickle file if the checksums are
|
||||
# the same.
|
||||
def sanitized_update(
|
||||
self,
|
||||
source_file: SourceFile,
|
||||
old_manifest: Any,
|
||||
resource_type: NodeType,
|
||||
) -> bool:
|
||||
|
||||
if isinstance(source_file.path, RemoteFile):
|
||||
return False
|
||||
|
||||
old_file = old_manifest.get_file(source_file)
|
||||
for doc_id in old_file.docs:
|
||||
doc = _expect_value(doc_id, old_manifest.docs, old_file, "docs")
|
||||
self.add_doc(source_file, doc)
|
||||
|
||||
for macro_id in old_file.macros:
|
||||
macro = _expect_value(
|
||||
macro_id, old_manifest.macros, old_file, "macros"
|
||||
)
|
||||
self.add_macro(source_file, macro)
|
||||
|
||||
for source_id in old_file.sources:
|
||||
source = _expect_value(
|
||||
source_id, old_manifest.sources, old_file, "sources"
|
||||
)
|
||||
self.add_source(source_file, source)
|
||||
|
||||
# because we know this is how we _parsed_ the node, we can safely
|
||||
# assume if it's disabled it was done by the project or file, and
|
||||
# we can keep our old data
|
||||
# the node ID could be in old_manifest.disabled AND in old_manifest.nodes.
|
||||
# In that case, we have to make sure the path also matches.
|
||||
for node_id in old_file.nodes:
|
||||
# cheat: look at the first part of the node ID and compare it to
|
||||
# the parser resource type. On a mismatch, bail out.
|
||||
if resource_type != node_id.split('.')[0]:
|
||||
continue
|
||||
self._process_node(node_id, source_file, old_file, old_manifest)
|
||||
|
||||
for exposure_id in old_file.exposures:
|
||||
exposure = _expect_value(
|
||||
exposure_id, old_manifest.exposures, old_file, "exposures"
|
||||
)
|
||||
self.add_exposure(source_file, exposure)
|
||||
|
||||
# Note: There shouldn't be any patches in here after the cleanup.
|
||||
# The pickled Manifest should have had all patches applied.
|
||||
patched = False
|
||||
for name in old_file.patches:
|
||||
patch = _expect_value(
|
||||
name, old_manifest.patches, old_file, "patches"
|
||||
)
|
||||
self.add_patch(source_file, patch)
|
||||
patched = True
|
||||
if patched:
|
||||
self.get_file(source_file).patches.sort()
|
||||
|
||||
macro_patched = False
|
||||
for key in old_file.macro_patches:
|
||||
macro_patch = _expect_value(
|
||||
key, old_manifest.macro_patches, old_file, "macro_patches"
|
||||
)
|
||||
self.add_macro_patch(source_file, macro_patch)
|
||||
macro_patched = True
|
||||
if macro_patched:
|
||||
self.get_file(source_file).macro_patches.sort()
|
||||
|
||||
return True
|
||||
# end of methods formerly in ParseResult
|
||||
|
||||
# Provide support for copy.deepcopy() - we just need to avoid the lock!
|
||||
@@ -1189,13 +1068,11 @@ class Manifest(MacroMethods):
|
||||
self.metadata,
|
||||
self.flat_graph,
|
||||
self.state_check,
|
||||
self.macro_patches,
|
||||
self.patches,
|
||||
self.source_patches,
|
||||
self._disabled,
|
||||
self._docs_cache,
|
||||
self._sources_cache,
|
||||
self._refs_cache,
|
||||
self._doc_lookup,
|
||||
self._source_lookup,
|
||||
self._ref_lookup,
|
||||
)
|
||||
return self.__class__, args
|
||||
|
||||
@@ -1213,7 +1090,7 @@ AnyManifest = Union[Manifest, MacroManifest]
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('manifest', 1)
|
||||
@schema_version('manifest', 2)
|
||||
class WritableManifest(ArtifactMixin):
|
||||
nodes: Mapping[UniqueID, ManifestNode] = field(
|
||||
metadata=dict(description=(
|
||||
@@ -1266,6 +1143,10 @@ def _check_duplicates(
|
||||
raise_duplicate_resource_name(value, src[value.unique_id])
|
||||
|
||||
|
||||
K_T = TypeVar('K_T')
|
||||
V_T = TypeVar('V_T')
|
||||
|
||||
|
||||
def _expect_value(
|
||||
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
|
||||
) -> V_T:
|
||||
|
||||
@@ -2,14 +2,13 @@ from dataclasses import field, Field, dataclass
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from typing import (
|
||||
Any, List, Optional, Dict, MutableMapping, Union, Type,
|
||||
TypeVar, Callable,
|
||||
Any, List, Optional, Dict, Union, Type, TypeVar
|
||||
)
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ValidationError, register_pattern,
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import AdditionalPropertiesAllowed
|
||||
from dbt.exceptions import CompilationException, InternalException
|
||||
from dbt.exceptions import InternalException
|
||||
from dbt.contracts.util import Replaceable, list_str
|
||||
from dbt import hooks
|
||||
from dbt.node_types import NodeType
|
||||
@@ -182,53 +181,29 @@ T = TypeVar('T', bound='BaseConfig')
|
||||
|
||||
@dataclass
|
||||
class BaseConfig(
|
||||
AdditionalPropertiesAllowed, Replaceable, MutableMapping[str, Any]
|
||||
AdditionalPropertiesAllowed, Replaceable
|
||||
):
|
||||
# Implement MutableMapping so this config will behave as some macros expect
|
||||
# during parsing (notably, syntax like `{{ node.config['schema'] }}`)
|
||||
|
||||
# enable syntax like: config['key']
|
||||
def __getitem__(self, key):
|
||||
"""Handle parse-time use of `config` as a dictionary, making the extra
|
||||
values available during parsing.
|
||||
"""
|
||||
return self.get(key)
|
||||
|
||||
# like doing 'get' on a dictionary
|
||||
def get(self, key, default=None):
|
||||
if hasattr(self, key):
|
||||
return getattr(self, key)
|
||||
else:
|
||||
elif key in self._extra:
|
||||
return self._extra[key]
|
||||
else:
|
||||
return default
|
||||
|
||||
# enable syntax like: config['key'] = value
|
||||
def __setitem__(self, key, value):
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
else:
|
||||
self._extra[key] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
if hasattr(self, key):
|
||||
msg = (
|
||||
'Error, tried to delete config key "{}": Cannot delete '
|
||||
'built-in keys'
|
||||
).format(key)
|
||||
raise CompilationException(msg)
|
||||
else:
|
||||
del self._extra[key]
|
||||
|
||||
def _content_iterator(self, include_condition: Callable[[Field], bool]):
|
||||
seen = set()
|
||||
for fld, _ in self._get_fields():
|
||||
seen.add(fld.name)
|
||||
if include_condition(fld):
|
||||
yield fld.name
|
||||
|
||||
for key in self._extra:
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
yield key
|
||||
|
||||
def __iter__(self):
|
||||
yield from self._content_iterator(include_condition=lambda f: True)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._get_fields()) + len(self._extra)
|
||||
|
||||
@staticmethod
|
||||
def compare_key(
|
||||
unrendered: Dict[str, Any],
|
||||
@@ -436,8 +411,42 @@ class SeedConfig(NodeConfig):
|
||||
|
||||
@dataclass
|
||||
class TestConfig(NodeConfig):
|
||||
schema: Optional[str] = field(
|
||||
default='dbt_test__audit',
|
||||
metadata=CompareBehavior.Exclude.meta(),
|
||||
)
|
||||
materialized: str = 'test'
|
||||
severity: Severity = Severity('ERROR')
|
||||
store_failures: Optional[bool] = None
|
||||
where: Optional[str] = None
|
||||
limit: Optional[int] = None
|
||||
fail_calc: str = 'count(*)'
|
||||
warn_if: str = '!= 0'
|
||||
error_if: str = '!= 0'
|
||||
|
||||
@classmethod
|
||||
def same_contents(
|
||||
cls, unrendered: Dict[str, Any], other: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""This is like __eq__, except it explicitly checks certain fields."""
|
||||
modifiers = [
|
||||
'severity',
|
||||
'where',
|
||||
'limit',
|
||||
'fail_calc',
|
||||
'warn_if',
|
||||
'error_if',
|
||||
'store_failures'
|
||||
]
|
||||
|
||||
seen = set()
|
||||
for _, target_name in cls._get_fields():
|
||||
key = target_name
|
||||
seen.add(key)
|
||||
if key in modifiers:
|
||||
if not cls.compare_key(unrendered, other, key):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -457,6 +466,11 @@ class SnapshotConfig(EmptySnapshotConfig):
|
||||
@classmethod
|
||||
def validate(cls, data):
|
||||
super().validate(data)
|
||||
if not data.get('strategy') or not data.get('unique_key') or not \
|
||||
data.get('target_schema'):
|
||||
raise ValidationError(
|
||||
"Snapshots must be configured with a 'strategy', 'unique_key', "
|
||||
"and 'target_schema'.")
|
||||
if data.get('strategy') == 'check':
|
||||
if not data.get('check_cols'):
|
||||
raise ValidationError(
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from mashumaro.types import SerializableType
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Optional,
|
||||
@@ -115,6 +117,21 @@ class ParsedNodeMixins(dbtClassMixin):
|
||||
def is_refable(self):
|
||||
return self.resource_type in NodeType.refable()
|
||||
|
||||
@property
|
||||
def should_store_failures(self):
|
||||
return self.resource_type == NodeType.Test and (
|
||||
self.config.store_failures if self.config.store_failures is not None
|
||||
else flags.STORE_FAILURES
|
||||
)
|
||||
|
||||
# will this node map to an object in the database?
|
||||
@property
|
||||
def is_relational(self):
|
||||
return (
|
||||
self.resource_type in NodeType.refable() or
|
||||
self.should_store_failures
|
||||
)
|
||||
|
||||
@property
|
||||
def is_ephemeral(self):
|
||||
return self.config.materialized == 'ephemeral'
|
||||
@@ -131,7 +148,9 @@ class ParsedNodeMixins(dbtClassMixin):
|
||||
"""Given a ParsedNodePatch, add the new information to the node."""
|
||||
# explicitly pick out the parts to update so we don't inadvertently
|
||||
# step on the model name or anything
|
||||
self.patch_path: Optional[str] = patch.original_file_path
|
||||
self.patch_path: Optional[str] = patch.file_id
|
||||
# update created_at so process_docs will run in partial parsing
|
||||
self.created_at = int(time.time())
|
||||
self.description = patch.description
|
||||
self.columns = patch.columns
|
||||
self.meta = patch.meta
|
||||
@@ -179,9 +198,11 @@ class ParsedNodeDefaults(ParsedNodeMandatory):
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
docs: Docs = field(default_factory=Docs)
|
||||
patch_path: Optional[str] = None
|
||||
compiled_path: Optional[str] = None
|
||||
build_path: Optional[str] = None
|
||||
deferred: bool = False
|
||||
unrendered_config: Dict[str, Any] = field(default_factory=dict)
|
||||
created_at: int = field(default_factory=lambda: int(time.time()))
|
||||
|
||||
def write_node(self, target_path: str, subdirectory: str, payload: str):
|
||||
if (os.path.basename(self.path) ==
|
||||
@@ -203,7 +224,39 @@ T = TypeVar('T', bound='ParsedNode')
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins):
|
||||
class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
|
||||
|
||||
def _serialize(self):
|
||||
return self.to_dict()
|
||||
|
||||
@classmethod
|
||||
def _deserialize(cls, dct: Dict[str, int]):
|
||||
# The serialized ParsedNodes do not differ from each other
|
||||
# in fields that would allow 'from_dict' to distinguis
|
||||
# between them.
|
||||
resource_type = dct['resource_type']
|
||||
if resource_type == 'model':
|
||||
return ParsedModelNode.from_dict(dct)
|
||||
elif resource_type == 'analysis':
|
||||
return ParsedAnalysisNode.from_dict(dct)
|
||||
elif resource_type == 'seed':
|
||||
return ParsedSeedNode.from_dict(dct)
|
||||
elif resource_type == 'rpc':
|
||||
return ParsedRPCNode.from_dict(dct)
|
||||
elif resource_type == 'test':
|
||||
if 'test_metadata' in dct:
|
||||
return ParsedSchemaTestNode.from_dict(dct)
|
||||
else:
|
||||
return ParsedDataTestNode.from_dict(dct)
|
||||
elif resource_type == 'operation':
|
||||
return ParsedHookNode.from_dict(dct)
|
||||
elif resource_type == 'seed':
|
||||
return ParsedSeedNode.from_dict(dct)
|
||||
elif resource_type == 'snapshot':
|
||||
return ParsedSnapshotNode.from_dict(dct)
|
||||
else:
|
||||
return cls.from_dict(dct)
|
||||
|
||||
def _persist_column_docs(self) -> bool:
|
||||
return bool(self.config.persist_docs.get('columns'))
|
||||
|
||||
@@ -368,15 +421,6 @@ class ParsedSchemaTestNode(ParsedNode, HasTestMetadata):
|
||||
column_name: Optional[str] = None
|
||||
config: TestConfig = field(default_factory=TestConfig)
|
||||
|
||||
def same_config(self, other) -> bool:
|
||||
return (
|
||||
self.unrendered_config.get('severity') ==
|
||||
other.unrendered_config.get('severity')
|
||||
)
|
||||
|
||||
def same_column_name(self, other) -> bool:
|
||||
return self.column_name == other.column_name
|
||||
|
||||
def same_contents(self, other) -> bool:
|
||||
if other is None:
|
||||
return False
|
||||
@@ -441,13 +485,15 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
|
||||
docs: Docs = field(default_factory=Docs)
|
||||
patch_path: Optional[str] = None
|
||||
arguments: List[MacroArgument] = field(default_factory=list)
|
||||
created_at: int = field(default_factory=lambda: int(time.time()))
|
||||
|
||||
def local_vars(self):
|
||||
return {}
|
||||
|
||||
def patch(self, patch: ParsedMacroPatch):
|
||||
self.patch_path: Optional[str] = patch.original_file_path
|
||||
self.patch_path: Optional[str] = patch.file_id
|
||||
self.description = patch.description
|
||||
self.created_at = int(time.time())
|
||||
self.meta = patch.meta
|
||||
self.docs = patch.docs
|
||||
self.arguments = patch.arguments
|
||||
@@ -567,6 +613,7 @@ class ParsedSourceDefinition(
|
||||
patch_path: Optional[Path] = None
|
||||
unrendered_config: Dict[str, Any] = field(default_factory=dict)
|
||||
relation_name: Optional[str] = None
|
||||
created_at: int = field(default_factory=lambda: int(time.time()))
|
||||
|
||||
def same_database_representation(
|
||||
self, other: 'ParsedSourceDefinition'
|
||||
@@ -667,10 +714,13 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
|
||||
resource_type: NodeType = NodeType.Exposure
|
||||
description: str = ''
|
||||
maturity: Optional[MaturityType] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
url: Optional[str] = None
|
||||
depends_on: DependsOn = field(default_factory=DependsOn)
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
sources: List[List[str]] = field(default_factory=list)
|
||||
created_at: int = field(default_factory=lambda: int(time.time()))
|
||||
|
||||
@property
|
||||
def depends_on_nodes(self):
|
||||
@@ -680,11 +730,6 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
|
||||
def search_name(self):
|
||||
return self.name
|
||||
|
||||
# no tags for now, but we could definitely add them
|
||||
@property
|
||||
def tags(self):
|
||||
return []
|
||||
|
||||
def same_depends_on(self, old: 'ParsedExposure') -> bool:
|
||||
return set(self.depends_on.nodes) == set(old.depends_on.nodes)
|
||||
|
||||
@@ -705,6 +750,7 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
|
||||
|
||||
def same_contents(self, old: Optional['ParsedExposure']) -> bool:
|
||||
# existing when it didn't before is a change!
|
||||
# metadata/tags changes are not "changes"
|
||||
if old is None:
|
||||
return True
|
||||
|
||||
|
||||
@@ -25,6 +25,10 @@ class UnparsedBaseNode(dbtClassMixin, Replaceable):
|
||||
path: str
|
||||
original_file_path: str
|
||||
|
||||
@property
|
||||
def file_id(self):
|
||||
return f'{self.package_name}://{self.original_file_path}'
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasSQL:
|
||||
@@ -116,6 +120,10 @@ class HasYamlMetadata(dbtClassMixin):
|
||||
yaml_key: str
|
||||
package_name: str
|
||||
|
||||
@property
|
||||
def file_id(self):
|
||||
return f'{self.package_name}://{self.original_file_path}'
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedAnalysisUpdate(HasColumnDocs, HasDocs, HasYamlMetadata):
|
||||
@@ -347,6 +355,10 @@ class UnparsedDocumentation(dbtClassMixin, Replaceable):
|
||||
path: str
|
||||
original_file_path: str
|
||||
|
||||
@property
|
||||
def file_id(self):
|
||||
return f'{self.package_name}://{self.original_file_path}'
|
||||
|
||||
@property
|
||||
def resource_type(self):
|
||||
return NodeType.Documentation
|
||||
@@ -413,5 +425,7 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
|
||||
owner: ExposureOwner
|
||||
description: str = ''
|
||||
maturity: Optional[MaturityType] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
url: Optional[str] = None
|
||||
depends_on: List[str] = field(default_factory=list)
|
||||
|
||||
@@ -70,6 +70,7 @@ class GitPackage(Package):
|
||||
git: str
|
||||
revision: Optional[RawVersion] = None
|
||||
warn_unpinned: Optional[bool] = None
|
||||
subdirectory: Optional[str] = None
|
||||
|
||||
def get_revisions(self) -> List[str]:
|
||||
if self.revision is None:
|
||||
@@ -190,6 +191,7 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
|
||||
on_run_start: Optional[List[str]] = field(default_factory=list_str)
|
||||
on_run_end: Optional[List[str]] = field(default_factory=list_str)
|
||||
require_dbt_version: Optional[Union[List[str], str]] = None
|
||||
dispatch: List[Dict[str, Any]] = field(default_factory=list)
|
||||
models: Dict[str, Any] = field(default_factory=dict)
|
||||
seeds: Dict[str, Any] = field(default_factory=dict)
|
||||
snapshots: Dict[str, Any] = field(default_factory=dict)
|
||||
@@ -212,6 +214,13 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
|
||||
raise ValidationError(
|
||||
f"Invalid project name: {data['name']} is a reserved word"
|
||||
)
|
||||
# validate dispatch config
|
||||
if 'dispatch' in data and data['dispatch']:
|
||||
entries = data['dispatch']
|
||||
for entry in entries:
|
||||
if ('macro_namespace' not in entry or 'search_order' not in entry or
|
||||
not isinstance(entry['search_order'], list)):
|
||||
raise ValidationError(f"Invalid project dispatch config: {entry}")
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -94,13 +94,16 @@ class BaseResult(dbtClassMixin):
|
||||
thread_id: str
|
||||
execution_time: float
|
||||
adapter_response: Dict[str, Any]
|
||||
message: Optional[Union[str, int]]
|
||||
message: Optional[str]
|
||||
failures: Optional[int]
|
||||
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
if 'message' not in data:
|
||||
data['message'] = None
|
||||
if 'failures' not in data:
|
||||
data['failures'] = None
|
||||
return data
|
||||
|
||||
|
||||
@@ -157,7 +160,8 @@ def process_run_result(result: RunResult) -> RunResultOutput:
|
||||
thread_id=result.thread_id,
|
||||
execution_time=result.execution_time,
|
||||
message=result.message,
|
||||
adapter_response=result.adapter_response
|
||||
adapter_response=result.adapter_response,
|
||||
failures=result.failures
|
||||
)
|
||||
|
||||
|
||||
@@ -180,7 +184,7 @@ class RunExecutionResult(
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('run-results', 1)
|
||||
@schema_version('run-results', 2)
|
||||
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
|
||||
results: Sequence[RunResultOutput]
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
@@ -378,6 +382,7 @@ class FreshnessExecutionResultArtifact(
|
||||
|
||||
|
||||
Primitive = Union[bool, str, float, None]
|
||||
PrimitiveDict = Dict[str, Primitive]
|
||||
|
||||
CatalogKey = NamedTuple(
|
||||
'CatalogKey',
|
||||
|
||||
@@ -63,6 +63,16 @@ class RPCCompileParameters(RPCParameters):
|
||||
state: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RPCListParameters(RPCParameters):
|
||||
resource_types: Optional[List[str]] = None
|
||||
models: Union[None, str, List[str]] = None
|
||||
exclude: Union[None, str, List[str]] = None
|
||||
select: Union[None, str, List[str]] = None
|
||||
selector: Optional[str] = None
|
||||
output: Optional[str] = 'json'
|
||||
|
||||
|
||||
@dataclass
|
||||
class RPCRunParameters(RPCParameters):
|
||||
threads: Optional[int] = None
|
||||
@@ -190,6 +200,13 @@ class RemoteResult(VersionedSchema):
|
||||
logs: List[LogMessage]
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('remote-list-results', 1)
|
||||
class RemoteListResults(RemoteResult):
|
||||
output: List[Any]
|
||||
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('remote-deps-result', 1)
|
||||
class RemoteDepsResult(RemoteResult):
|
||||
|
||||
@@ -14,7 +14,6 @@ from dbt.version import __version__
|
||||
from dbt.tracking import get_invocation_id
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
MacroKey = Tuple[str, str]
|
||||
SourceKey = Tuple[str, str]
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,20 @@ class DBTDeprecation:
|
||||
active_deprecations.add(self.name)
|
||||
|
||||
|
||||
class DispatchPackagesDeprecation(DBTDeprecation):
|
||||
_name = 'dispatch-packages'
|
||||
_description = '''\
|
||||
The "packages" argument of adapter.dispatch() has been deprecated.
|
||||
Use the "macro_namespace" argument instead.
|
||||
|
||||
Raised during dispatch for: {macro_name}
|
||||
|
||||
For more information, see:
|
||||
|
||||
https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch
|
||||
'''
|
||||
|
||||
|
||||
class MaterializationReturnDeprecation(DBTDeprecation):
|
||||
_name = 'materialization-return'
|
||||
|
||||
@@ -155,6 +169,7 @@ def warn(name, *args, **kwargs):
|
||||
active_deprecations: Set[str] = set()
|
||||
|
||||
deprecations_list: List[DBTDeprecation] = [
|
||||
DispatchPackagesDeprecation(),
|
||||
MaterializationReturnDeprecation(),
|
||||
NotADictionaryDeprecation(),
|
||||
ColumnQuotingDeprecation(),
|
||||
|
||||
@@ -93,6 +93,9 @@ class PinnedPackage(BasePackage):
|
||||
dest_dirname = self.get_project_name(project, renderer)
|
||||
return os.path.join(project.modules_path, dest_dirname)
|
||||
|
||||
def get_subdirectory(self):
|
||||
return None
|
||||
|
||||
|
||||
SomePinned = TypeVar('SomePinned', bound=PinnedPackage)
|
||||
SomeUnpinned = TypeVar('SomeUnpinned', bound='UnpinnedPackage')
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
import hashlib
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from dbt.clients import git, system
|
||||
from dbt.config import Project
|
||||
@@ -37,16 +37,24 @@ class GitPackageMixin:
|
||||
|
||||
class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
def __init__(
|
||||
self, git: str, revision: str, warn_unpinned: bool = True
|
||||
self,
|
||||
git: str,
|
||||
revision: str,
|
||||
warn_unpinned: bool = True,
|
||||
subdirectory: Optional[str] = None,
|
||||
) -> None:
|
||||
super().__init__(git)
|
||||
self.revision = revision
|
||||
self.warn_unpinned = warn_unpinned
|
||||
self.subdirectory = subdirectory
|
||||
self._checkout_name = md5sum(self.git)
|
||||
|
||||
def get_version(self):
|
||||
return self.revision
|
||||
|
||||
def get_subdirectory(self):
|
||||
return self.subdirectory
|
||||
|
||||
def nice_version_name(self):
|
||||
if self.revision == 'HEAD':
|
||||
return 'HEAD (default revision)'
|
||||
@@ -69,7 +77,7 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
try:
|
||||
dir_ = git.clone_and_checkout(
|
||||
self.git, get_downloads_path(), revision=self.revision,
|
||||
dirname=self._checkout_name
|
||||
dirname=self._checkout_name, subdirectory=self.subdirectory
|
||||
)
|
||||
except ExecutableError as exc:
|
||||
if exc.cmd and exc.cmd[0] == 'git':
|
||||
@@ -107,11 +115,16 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
|
||||
class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
|
||||
def __init__(
|
||||
self, git: str, revisions: List[str], warn_unpinned: bool = True
|
||||
self,
|
||||
git: str,
|
||||
revisions: List[str],
|
||||
warn_unpinned: bool = True,
|
||||
subdirectory: Optional[str] = None,
|
||||
) -> None:
|
||||
super().__init__(git)
|
||||
self.revisions = revisions
|
||||
self.warn_unpinned = warn_unpinned
|
||||
self.subdirectory = subdirectory
|
||||
|
||||
@classmethod
|
||||
def from_contract(
|
||||
@@ -122,7 +135,7 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
|
||||
# we want to map None -> True
|
||||
warn_unpinned = contract.warn_unpinned is not False
|
||||
return cls(git=contract.git, revisions=revisions,
|
||||
warn_unpinned=warn_unpinned)
|
||||
warn_unpinned=warn_unpinned, subdirectory=contract.subdirectory)
|
||||
|
||||
def all_names(self) -> List[str]:
|
||||
if self.git.endswith('.git'):
|
||||
@@ -140,6 +153,7 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
|
||||
git=self.git,
|
||||
revisions=self.revisions + other.revisions,
|
||||
warn_unpinned=warn_unpinned,
|
||||
subdirectory=self.subdirectory,
|
||||
)
|
||||
|
||||
def resolved(self) -> GitPinnedPackage:
|
||||
@@ -153,5 +167,5 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
|
||||
|
||||
return GitPinnedPackage(
|
||||
git=self.git, revision=requested.pop(),
|
||||
warn_unpinned=self.warn_unpinned
|
||||
warn_unpinned=self.warn_unpinned, subdirectory=self.subdirectory
|
||||
)
|
||||
|
||||
@@ -289,6 +289,15 @@ class JinjaRenderingException(CompilationException):
|
||||
pass
|
||||
|
||||
|
||||
class UndefinedMacroException(CompilationException):
|
||||
|
||||
def __str__(self, prefix='! ') -> str:
|
||||
msg = super().__str__(prefix)
|
||||
return f'{msg}. This can happen when calling a macro that does ' \
|
||||
'not exist. Check for typos and/or install package dependencies ' \
|
||||
'with "dbt deps".'
|
||||
|
||||
|
||||
class UnknownAsyncIDException(Exception):
|
||||
CODE = 10012
|
||||
MESSAGE = 'RPC server got an unknown async ID'
|
||||
@@ -845,11 +854,11 @@ def _fix_dupe_msg(path_1: str, path_2: str, name: str, type_name: str) -> str:
|
||||
)
|
||||
|
||||
|
||||
def raise_duplicate_patch_name(patch_1, patch_2):
|
||||
def raise_duplicate_patch_name(patch_1, existing_patch_path):
|
||||
name = patch_1.name
|
||||
fix = _fix_dupe_msg(
|
||||
patch_1.original_file_path,
|
||||
patch_2.original_file_path,
|
||||
existing_patch_path,
|
||||
name,
|
||||
'resource',
|
||||
)
|
||||
@@ -860,12 +869,12 @@ def raise_duplicate_patch_name(patch_1, patch_2):
|
||||
)
|
||||
|
||||
|
||||
def raise_duplicate_macro_patch_name(patch_1, patch_2):
|
||||
def raise_duplicate_macro_patch_name(patch_1, existing_patch_path):
|
||||
package_name = patch_1.package_name
|
||||
name = patch_1.name
|
||||
fix = _fix_dupe_msg(
|
||||
patch_1.original_file_path,
|
||||
patch_2.original_file_path,
|
||||
existing_patch_path,
|
||||
name,
|
||||
'macros'
|
||||
)
|
||||
|
||||
@@ -13,9 +13,11 @@ FULL_REFRESH = None
|
||||
USE_CACHE = None
|
||||
WARN_ERROR = None
|
||||
TEST_NEW_PARSER = None
|
||||
USE_EXPERIMENTAL_PARSER = None
|
||||
WRITE_JSON = None
|
||||
PARTIAL_PARSE = None
|
||||
USE_COLORS = None
|
||||
STORE_FAILURES = None
|
||||
|
||||
|
||||
def env_set_truthy(key: str) -> Optional[str]:
|
||||
@@ -53,22 +55,26 @@ MP_CONTEXT = _get_context()
|
||||
|
||||
def reset():
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
|
||||
WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS
|
||||
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
|
||||
STORE_FAILURES
|
||||
|
||||
STRICT_MODE = False
|
||||
FULL_REFRESH = False
|
||||
USE_CACHE = True
|
||||
WARN_ERROR = False
|
||||
TEST_NEW_PARSER = False
|
||||
USE_EXPERIMENTAL_PARSER = False
|
||||
WRITE_JSON = True
|
||||
PARTIAL_PARSE = False
|
||||
MP_CONTEXT = _get_context()
|
||||
USE_COLORS = True
|
||||
STORE_FAILURES = False
|
||||
|
||||
|
||||
def set_from_args(args):
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
|
||||
WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS
|
||||
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
|
||||
STORE_FAILURES
|
||||
|
||||
USE_CACHE = getattr(args, 'use_cache', USE_CACHE)
|
||||
|
||||
@@ -80,6 +86,7 @@ def set_from_args(args):
|
||||
)
|
||||
|
||||
TEST_NEW_PARSER = getattr(args, 'test_new_parser', TEST_NEW_PARSER)
|
||||
USE_EXPERIMENTAL_PARSER = getattr(args, 'use_experimental_parser', USE_EXPERIMENTAL_PARSER)
|
||||
WRITE_JSON = getattr(args, 'write_json', WRITE_JSON)
|
||||
PARTIAL_PARSE = getattr(args, 'partial_parse', None)
|
||||
MP_CONTEXT = _get_context()
|
||||
@@ -91,6 +98,8 @@ def set_from_args(args):
|
||||
if use_colors_override is not None:
|
||||
USE_COLORS = use_colors_override
|
||||
|
||||
STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
|
||||
|
||||
|
||||
# initialize everything to the defaults on module load
|
||||
reset()
|
||||
|
||||
@@ -26,7 +26,7 @@ SCHEMA_TEST_SELECTOR: str = 'test_type:schema'
|
||||
|
||||
|
||||
def parse_union(
|
||||
components: List[str], expect_exists: bool
|
||||
components: List[str], expect_exists: bool, greedy: bool = False
|
||||
) -> SelectionUnion:
|
||||
# turn ['a b', 'c'] -> ['a', 'b', 'c']
|
||||
raw_specs = itertools.chain.from_iterable(
|
||||
@@ -37,7 +37,7 @@ def parse_union(
|
||||
# ['a', 'b', 'c,d'] -> union('a', 'b', intersection('c', 'd'))
|
||||
for raw_spec in raw_specs:
|
||||
intersection_components: List[SelectionSpec] = [
|
||||
SelectionCriteria.from_single_spec(part)
|
||||
SelectionCriteria.from_single_spec(part, greedy=greedy)
|
||||
for part in raw_spec.split(INTERSECTION_DELIMITER)
|
||||
]
|
||||
union_components.append(SelectionIntersection(
|
||||
@@ -45,7 +45,6 @@ def parse_union(
|
||||
expect_exists=expect_exists,
|
||||
raw=raw_spec,
|
||||
))
|
||||
|
||||
return SelectionUnion(
|
||||
components=union_components,
|
||||
expect_exists=False,
|
||||
@@ -54,21 +53,21 @@ def parse_union(
|
||||
|
||||
|
||||
def parse_union_from_default(
|
||||
raw: Optional[List[str]], default: List[str]
|
||||
raw: Optional[List[str]], default: List[str], greedy: bool = False
|
||||
) -> SelectionUnion:
|
||||
components: List[str]
|
||||
expect_exists: bool
|
||||
if raw is None:
|
||||
return parse_union(components=default, expect_exists=False)
|
||||
return parse_union(components=default, expect_exists=False, greedy=greedy)
|
||||
else:
|
||||
return parse_union(components=raw, expect_exists=True)
|
||||
return parse_union(components=raw, expect_exists=True, greedy=greedy)
|
||||
|
||||
|
||||
def parse_difference(
|
||||
include: Optional[List[str]], exclude: Optional[List[str]]
|
||||
) -> SelectionDifference:
|
||||
included = parse_union_from_default(include, DEFAULT_INCLUDES)
|
||||
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES)
|
||||
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
|
||||
return SelectionDifference(components=[included, excluded])
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
from typing import Set, List, Optional
|
||||
from typing import Set, List, Optional, Tuple
|
||||
|
||||
from .graph import Graph, UniqueId
|
||||
from .queue import GraphQueue
|
||||
@@ -30,6 +30,18 @@ def alert_non_existence(raw_spec, nodes):
|
||||
)
|
||||
|
||||
|
||||
def can_select_indirectly(node):
|
||||
"""If a node is not selected itself, but its parent(s) are, it may qualify
|
||||
for indirect selection.
|
||||
Today, only Test nodes can be indirectly selected. In the future,
|
||||
other node types or invocation flags might qualify.
|
||||
"""
|
||||
if node.resource_type == NodeType.Test:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class NodeSelector(MethodManager):
|
||||
"""The node selector is aware of the graph and manifest,
|
||||
"""
|
||||
@@ -61,8 +73,8 @@ class NodeSelector(MethodManager):
|
||||
|
||||
def get_nodes_from_criteria(
|
||||
self,
|
||||
spec: SelectionCriteria,
|
||||
) -> Set[UniqueId]:
|
||||
spec: SelectionCriteria
|
||||
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||||
"""Get all nodes specified by the single selection criteria.
|
||||
|
||||
- collect the directly included nodes
|
||||
@@ -79,11 +91,14 @@ class NodeSelector(MethodManager):
|
||||
f"The '{spec.method}' selector specified in {spec.raw} is "
|
||||
f"invalid. Must be one of [{valid_selectors}]"
|
||||
)
|
||||
return set()
|
||||
return set(), set()
|
||||
|
||||
extras = self.collect_specified_neighbors(spec, collected)
|
||||
result = self.expand_selection(collected | extras)
|
||||
return result
|
||||
neighbors = self.collect_specified_neighbors(spec, collected)
|
||||
direct_nodes, indirect_nodes = self.expand_selection(
|
||||
selected=(collected | neighbors),
|
||||
greedy=spec.greedy
|
||||
)
|
||||
return direct_nodes, indirect_nodes
|
||||
|
||||
def collect_specified_neighbors(
|
||||
self, spec: SelectionCriteria, selected: Set[UniqueId]
|
||||
@@ -106,24 +121,46 @@ class NodeSelector(MethodManager):
|
||||
additional.update(self.graph.select_children(selected, depth))
|
||||
return additional
|
||||
|
||||
def select_nodes(self, spec: SelectionSpec) -> Set[UniqueId]:
|
||||
"""Select the nodes in the graph according to the spec.
|
||||
|
||||
If the spec is a composite spec (a union, difference, or intersection),
|
||||
def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||||
"""If the spec is a composite spec (a union, difference, or intersection),
|
||||
recurse into its selections and combine them. If the spec is a concrete
|
||||
selection criteria, resolve that using the given graph.
|
||||
"""
|
||||
if isinstance(spec, SelectionCriteria):
|
||||
result = self.get_nodes_from_criteria(spec)
|
||||
direct_nodes, indirect_nodes = self.get_nodes_from_criteria(spec)
|
||||
else:
|
||||
node_selections = [
|
||||
self.select_nodes(component)
|
||||
bundles = [
|
||||
self.select_nodes_recursively(component)
|
||||
for component in spec
|
||||
]
|
||||
result = spec.combined(node_selections)
|
||||
|
||||
direct_sets = []
|
||||
indirect_sets = []
|
||||
|
||||
for direct, indirect in bundles:
|
||||
direct_sets.append(direct)
|
||||
indirect_sets.append(direct | indirect)
|
||||
|
||||
initial_direct = spec.combined(direct_sets)
|
||||
indirect_nodes = spec.combined(indirect_sets)
|
||||
|
||||
direct_nodes = self.incorporate_indirect_nodes(initial_direct, indirect_nodes)
|
||||
|
||||
if spec.expect_exists:
|
||||
alert_non_existence(spec.raw, result)
|
||||
return result
|
||||
alert_non_existence(spec.raw, direct_nodes)
|
||||
|
||||
return direct_nodes, indirect_nodes
|
||||
|
||||
def select_nodes(self, spec: SelectionSpec) -> Set[UniqueId]:
|
||||
"""Select the nodes in the graph according to the spec.
|
||||
|
||||
This is the main point of entry for turning a spec into a set of nodes:
|
||||
- Recurse through spec, select by criteria, combine by set operation
|
||||
- Return final (unfiltered) selection set
|
||||
"""
|
||||
|
||||
direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
|
||||
return direct_nodes
|
||||
|
||||
def _is_graph_member(self, unique_id: UniqueId) -> bool:
|
||||
if unique_id in self.manifest.sources:
|
||||
@@ -162,12 +199,55 @@ class NodeSelector(MethodManager):
|
||||
unique_id for unique_id in selected if self._is_match(unique_id)
|
||||
}
|
||||
|
||||
def expand_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
|
||||
"""Perform selector-specific expansion."""
|
||||
def expand_selection(
|
||||
self, selected: Set[UniqueId], greedy: bool = False
|
||||
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||||
# Test selection can expand to include an implicitly/indirectly selected test.
|
||||
# In this way, `dbt test -m model_a` also includes tests that directly depend on `model_a`.
|
||||
# Expansion has two modes, GREEDY and NOT GREEDY.
|
||||
#
|
||||
# GREEDY mode: If ANY parent is selected, select the test. We use this for EXCLUSION.
|
||||
#
|
||||
# NOT GREEDY mode:
|
||||
# - If ALL parents are selected, select the test.
|
||||
# - If ANY parent is missing, return it separately. We'll keep it around
|
||||
# for later and see if its other parents show up.
|
||||
# We use this for INCLUSION.
|
||||
|
||||
direct_nodes = set(selected)
|
||||
indirect_nodes = set()
|
||||
|
||||
for unique_id in self.graph.select_successors(selected):
|
||||
if unique_id in self.manifest.nodes:
|
||||
node = self.manifest.nodes[unique_id]
|
||||
if can_select_indirectly(node):
|
||||
# should we add it in directly?
|
||||
if greedy or set(node.depends_on.nodes) <= set(selected):
|
||||
direct_nodes.add(unique_id)
|
||||
# if not:
|
||||
else:
|
||||
indirect_nodes.add(unique_id)
|
||||
|
||||
return direct_nodes, indirect_nodes
|
||||
|
||||
def incorporate_indirect_nodes(
|
||||
self, direct_nodes: Set[UniqueId], indirect_nodes: Set[UniqueId] = set()
|
||||
) -> Set[UniqueId]:
|
||||
# Check tests previously selected indirectly to see if ALL their
|
||||
# parents are now present.
|
||||
|
||||
selected = set(direct_nodes)
|
||||
|
||||
for unique_id in indirect_nodes:
|
||||
if unique_id in self.manifest.nodes:
|
||||
node = self.manifest.nodes[unique_id]
|
||||
if set(node.depends_on.nodes) <= set(selected):
|
||||
selected.add(unique_id)
|
||||
|
||||
return selected
|
||||
|
||||
def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:
|
||||
"""get_selected runs trhough the node selection process:
|
||||
"""get_selected runs through the node selection process:
|
||||
|
||||
- node selection. Based on the include/exclude sets, the set
|
||||
of matched unique IDs is returned
|
||||
|
||||
@@ -49,25 +49,23 @@ class MethodName(StrEnum):
|
||||
Exposure = 'exposure'
|
||||
|
||||
|
||||
def is_selected_node(real_node, node_selector):
|
||||
for i, selector_part in enumerate(node_selector):
|
||||
def is_selected_node(fqn: List[str], node_selector: str):
|
||||
|
||||
is_last = (i == len(node_selector) - 1)
|
||||
# If qualified_name exactly matches model name (fqn's leaf), return True
|
||||
if fqn[-1] == node_selector:
|
||||
return True
|
||||
# Flatten node parts. Dots in model names act as namespace separators
|
||||
flat_fqn = [item for segment in fqn for item in segment.split('.')]
|
||||
# Selector components cannot be more than fqn's
|
||||
if len(flat_fqn) < len(node_selector.split('.')):
|
||||
return False
|
||||
|
||||
for i, selector_part in enumerate(node_selector.split('.')):
|
||||
# if we hit a GLOB, then this node is selected
|
||||
if selector_part == SELECTOR_GLOB:
|
||||
return True
|
||||
|
||||
# match package.node_name or package.dir.node_name
|
||||
elif is_last and selector_part == real_node[-1]:
|
||||
return True
|
||||
|
||||
elif len(real_node) <= i:
|
||||
return False
|
||||
|
||||
elif real_node[i] == selector_part:
|
||||
elif flat_fqn[i] == selector_part:
|
||||
continue
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
@@ -154,31 +152,20 @@ class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
|
||||
|
||||
class QualifiedNameSelectorMethod(SelectorMethod):
|
||||
def node_is_match(
|
||||
self,
|
||||
qualified_name: List[str],
|
||||
package_names: Set[str],
|
||||
fqn: List[str],
|
||||
) -> bool:
|
||||
"""Determine if a qualfied name matches an fqn, given the set of package
|
||||
def node_is_match(self, qualified_name: str, fqn: List[str]) -> bool:
|
||||
"""Determine if a qualified name matches an fqn for all package
|
||||
names in the graph.
|
||||
|
||||
:param List[str] qualified_name: The components of the selector or node
|
||||
name, split on '.'.
|
||||
:param Set[str] package_names: The set of pacakge names in the graph.
|
||||
:param str qualified_name: The qualified name to match the nodes with
|
||||
:param List[str] fqn: The node's fully qualified name in the graph.
|
||||
"""
|
||||
if len(qualified_name) == 1 and fqn[-1] == qualified_name[0]:
|
||||
unscoped_fqn = fqn[1:]
|
||||
|
||||
if is_selected_node(fqn, qualified_name):
|
||||
return True
|
||||
# Match nodes across different packages
|
||||
elif is_selected_node(unscoped_fqn, qualified_name):
|
||||
return True
|
||||
|
||||
if qualified_name[0] in package_names:
|
||||
if is_selected_node(fqn, qualified_name):
|
||||
return True
|
||||
|
||||
for package_name in package_names:
|
||||
local_qualified_node_name = [package_name] + qualified_name
|
||||
if is_selected_node(fqn, local_qualified_node_name):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@@ -189,15 +176,9 @@ class QualifiedNameSelectorMethod(SelectorMethod):
|
||||
|
||||
:param str selector: The selector or node name
|
||||
"""
|
||||
qualified_name = selector.split(".")
|
||||
parsed_nodes = list(self.parsed_nodes(included_nodes))
|
||||
package_names = {n.package_name for _, n in parsed_nodes}
|
||||
for node, real_node in parsed_nodes:
|
||||
if self.node_is_match(
|
||||
qualified_name,
|
||||
package_names,
|
||||
real_node.fqn,
|
||||
):
|
||||
if self.node_is_match(selector, real_node.fqn):
|
||||
yield node
|
||||
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@ class SelectionCriteria:
|
||||
parents_depth: Optional[int]
|
||||
children: bool
|
||||
children_depth: Optional[int]
|
||||
greedy: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
if self.children and self.childrens_parents:
|
||||
@@ -103,7 +104,7 @@ class SelectionCriteria:
|
||||
|
||||
@classmethod
|
||||
def selection_criteria_from_dict(
|
||||
cls, raw: Any, dct: Dict[str, Any]
|
||||
cls, raw: Any, dct: Dict[str, Any], greedy: bool = False
|
||||
) -> 'SelectionCriteria':
|
||||
if 'value' not in dct:
|
||||
raise RuntimeException(
|
||||
@@ -123,10 +124,11 @@ class SelectionCriteria:
|
||||
parents_depth=parents_depth,
|
||||
children=bool(dct.get('children')),
|
||||
children_depth=children_depth,
|
||||
greedy=greedy
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def dict_from_single_spec(cls, raw: str):
|
||||
def dict_from_single_spec(cls, raw: str, greedy: bool = False):
|
||||
result = RAW_SELECTOR_PATTERN.match(raw)
|
||||
if result is None:
|
||||
return {'error': 'Invalid selector spec'}
|
||||
@@ -146,13 +148,13 @@ class SelectionCriteria:
|
||||
return dct
|
||||
|
||||
@classmethod
|
||||
def from_single_spec(cls, raw: str) -> 'SelectionCriteria':
|
||||
def from_single_spec(cls, raw: str, greedy: bool = False) -> 'SelectionCriteria':
|
||||
result = RAW_SELECTOR_PATTERN.match(raw)
|
||||
if result is None:
|
||||
# bad spec!
|
||||
raise RuntimeException(f'Invalid selector spec "{raw}"')
|
||||
|
||||
return cls.selection_criteria_from_dict(raw, result.groupdict())
|
||||
return cls.selection_criteria_from_dict(raw, result.groupdict(), greedy=greedy)
|
||||
|
||||
|
||||
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
|
||||
|
||||
@@ -51,6 +51,29 @@
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% macro get_create_index_sql(relation, index_dict) -%}
|
||||
{{ return(adapter.dispatch('get_create_index_sql')(relation, index_dict)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__get_create_index_sql(relation, index_dict) -%}
|
||||
{% do return(None) %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro create_indexes(relation) -%}
|
||||
{{ adapter.dispatch('create_indexes')(relation) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__create_indexes(relation) -%}
|
||||
{%- set _indexes = config.get('indexes', default=[]) -%}
|
||||
|
||||
{% for _index_dict in _indexes %}
|
||||
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
|
||||
{% if create_index_sql %}
|
||||
{% do run_query(create_index_sql) %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro create_view_as(relation, sql) -%}
|
||||
{{ adapter.dispatch('create_view_as')(relation, sql) }}
|
||||
{%- endmacro %}
|
||||
|
||||
@@ -72,3 +72,12 @@
|
||||
{% endif %}
|
||||
{% do return(config_full_refresh) %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro should_store_failures() %}
|
||||
{% set config_store_failures = config.get('store_failures') %}
|
||||
{% if config_store_failures is none %}
|
||||
{% set config_store_failures = flags.STORE_FAILURES %}
|
||||
{% endif %}
|
||||
{% do return(config_store_failures) %}
|
||||
{% endmacro %}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
{% set target_relation = this.incorporate(type='table') %}
|
||||
{% set existing_relation = load_relation(this) %}
|
||||
{% set tmp_relation = make_temp_relation(this) %}
|
||||
|
||||
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
||||
|
||||
@@ -17,12 +16,17 @@
|
||||
{% set build_sql = create_table_as(False, target_relation, sql) %}
|
||||
{% elif existing_relation.is_view or should_full_refresh() %}
|
||||
{#-- Make sure the backup doesn't exist so we don't encounter issues with the rename below #}
|
||||
{% set backup_identifier = existing_relation.identifier ~ "__dbt_backup" %}
|
||||
{% set tmp_identifier = model['name'] + '__dbt_tmp' %}
|
||||
{% set backup_identifier = model['name'] + "__dbt_backup" %}
|
||||
|
||||
{% set intermediate_relation = existing_relation.incorporate(path={"identifier": tmp_identifier}) %}
|
||||
{% set backup_relation = existing_relation.incorporate(path={"identifier": backup_identifier}) %}
|
||||
|
||||
{% do adapter.drop_relation(intermediate_relation) %}
|
||||
{% do adapter.drop_relation(backup_relation) %}
|
||||
|
||||
{% do adapter.rename_relation(target_relation, backup_relation) %}
|
||||
{% set build_sql = create_table_as(False, target_relation, sql) %}
|
||||
{% set build_sql = create_table_as(False, intermediate_relation, sql) %}
|
||||
{% set need_swap = true %}
|
||||
{% do to_drop.append(backup_relation) %}
|
||||
{% else %}
|
||||
{% set tmp_relation = make_temp_relation(target_relation) %}
|
||||
@@ -37,8 +41,17 @@
|
||||
{{ build_sql }}
|
||||
{% endcall %}
|
||||
|
||||
{% if need_swap %}
|
||||
{% do adapter.rename_relation(target_relation, backup_relation) %}
|
||||
{% do adapter.rename_relation(intermediate_relation, target_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{% if existing_relation is none or existing_relation.is_view or should_full_refresh() %}
|
||||
{% do create_indexes(target_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
-- `COMMIT` happens here
|
||||
|
||||
@@ -142,6 +142,10 @@
|
||||
{% set target_relation = this.incorporate(type='table') %}
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{% if full_refresh_mode or not exists_as_table %}
|
||||
{% do create_indexes(target_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
-- `COMMIT` happens here
|
||||
|
||||
@@ -263,6 +263,10 @@
|
||||
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{% if not target_relation_exists %}
|
||||
{% do create_indexes(target_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
{{ adapter.commit() }}
|
||||
|
||||
@@ -144,7 +144,7 @@
|
||||
{% if now is none or now is undefined -%}
|
||||
{%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%}
|
||||
{%- endif %}
|
||||
{% set updated_at = snapshot_string_as_time(now) %}
|
||||
{% set updated_at = config.get('updated_at', snapshot_string_as_time(now)) %}
|
||||
|
||||
{% set column_added = false %}
|
||||
|
||||
|
||||
@@ -47,6 +47,8 @@
|
||||
|
||||
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
|
||||
|
||||
{% do create_indexes(target_relation) %}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
@@ -1,10 +1,55 @@
|
||||
{%- materialization test, default -%}
|
||||
|
||||
{% set relations = [] %}
|
||||
|
||||
{% if should_store_failures() %}
|
||||
|
||||
{% set identifier = model['alias'] %}
|
||||
{% set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
|
||||
{% set target_relation = api.Relation.create(
|
||||
identifier=identifier, schema=schema, database=database, type='table') -%} %}
|
||||
|
||||
{% if old_relation %}
|
||||
{% do adapter.drop_relation(old_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{% call statement(auto_begin=True) %}
|
||||
{{ create_table_as(False, target_relation, sql) }}
|
||||
{% endcall %}
|
||||
|
||||
{% do relations.append(target_relation) %}
|
||||
|
||||
{% set main_sql %}
|
||||
select *
|
||||
from {{ target_relation }}
|
||||
{% endset %}
|
||||
|
||||
{{ adapter.commit() }}
|
||||
|
||||
{% else %}
|
||||
|
||||
{% set main_sql = sql %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% set limit = config.get('limit') %}
|
||||
{% set fail_calc = config.get('fail_calc') %}
|
||||
{% set warn_if = config.get('warn_if') %}
|
||||
{% set error_if = config.get('error_if') %}
|
||||
|
||||
{% call statement('main', fetch_result=True) -%}
|
||||
select count(*) as validation_errors
|
||||
|
||||
select
|
||||
{{ fail_calc }} as failures,
|
||||
{{ fail_calc }} {{ warn_if }} as should_warn,
|
||||
{{ fail_calc }} {{ error_if }} as should_error
|
||||
from (
|
||||
{{ sql }}
|
||||
) _dbt_internal_test
|
||||
{{ main_sql }}
|
||||
{{ "limit " ~ limit if limit != none }}
|
||||
) dbt_internal_test
|
||||
|
||||
{%- endcall %}
|
||||
|
||||
{{ return({'relations': relations}) }}
|
||||
|
||||
{%- endmaterialization -%}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
|
||||
{% macro handle_existing_table(full_refresh, old_relation) %}
|
||||
{{ adapter.dispatch("handle_existing_table", packages=['dbt'])(full_refresh, old_relation) }}
|
||||
{{ adapter.dispatch('handle_existing_table', macro_namespace = 'dbt')(full_refresh, old_relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__handle_existing_table(full_refresh, old_relation) %}
|
||||
|
||||
@@ -1,42 +1,33 @@
|
||||
|
||||
{% macro default__test_accepted_values(model, values) %}
|
||||
|
||||
{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}
|
||||
{% set quote_values = kwargs.get('quote', True) %}
|
||||
{% macro default__test_accepted_values(model, column_name, values, quote=True) %}
|
||||
|
||||
with all_values as (
|
||||
|
||||
select distinct
|
||||
{{ column_name }} as value_field
|
||||
select
|
||||
{{ column_name }} as value_field,
|
||||
count(*) as n_records
|
||||
|
||||
from {{ model }}
|
||||
group by 1
|
||||
|
||||
),
|
||||
|
||||
validation_errors as (
|
||||
|
||||
select
|
||||
value_field
|
||||
|
||||
from all_values
|
||||
where value_field not in (
|
||||
{% for value in values -%}
|
||||
{% if quote_values -%}
|
||||
'{{ value }}'
|
||||
{%- else -%}
|
||||
{{ value }}
|
||||
{%- endif -%}
|
||||
{%- if not loop.last -%},{%- endif %}
|
||||
{%- endfor %}
|
||||
)
|
||||
)
|
||||
|
||||
select count(*) as validation_errors
|
||||
from validation_errors
|
||||
select *
|
||||
from all_values
|
||||
where value_field not in (
|
||||
{% for value in values -%}
|
||||
{% if quote -%}
|
||||
'{{ value }}'
|
||||
{%- else -%}
|
||||
{{ value }}
|
||||
{%- endif -%}
|
||||
{%- if not loop.last -%},{%- endif %}
|
||||
{%- endfor %}
|
||||
)
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% test accepted_values(model, values) %}
|
||||
|
||||
{% test accepted_values(model, column_name, values, quote=True) %}
|
||||
{% set macro = adapter.dispatch('test_accepted_values') %}
|
||||
{{ macro(model, values, **kwargs) }}
|
||||
{{ macro(model, column_name, values, quote) }}
|
||||
{% endtest %}
|
||||
|
||||
@@ -1,15 +1,13 @@
|
||||
{% macro default__test_not_null(model, column_name) %}
|
||||
|
||||
{% macro default__test_not_null(model) %}
|
||||
|
||||
{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
|
||||
|
||||
select count(*) as validation_errors
|
||||
select *
|
||||
from {{ model }}
|
||||
where {{ column_name }} is null
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% test not_null(model) %}
|
||||
|
||||
{% test not_null(model, column_name) %}
|
||||
{% set macro = adapter.dispatch('test_not_null') %}
|
||||
{{ macro(model, **kwargs) }}
|
||||
{{ macro(model, column_name) }}
|
||||
{% endtest %}
|
||||
|
||||
@@ -1,24 +1,21 @@
|
||||
|
||||
{% macro default__test_relationships(model, to, field) %}
|
||||
{% macro default__test_relationships(model, column_name, to, field) %}
|
||||
|
||||
{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}
|
||||
select
|
||||
child.{{ column_name }}
|
||||
|
||||
from {{ model }} as child
|
||||
|
||||
select count(*) as validation_errors
|
||||
from (
|
||||
select {{ column_name }} as id from {{ model }}
|
||||
) as child
|
||||
left join (
|
||||
select {{ field }} as id from {{ to }}
|
||||
) as parent on parent.id = child.id
|
||||
where child.id is not null
|
||||
and parent.id is null
|
||||
left join {{ to }} as parent
|
||||
on child.{{ column_name }} = parent.{{ field }}
|
||||
|
||||
where child.{{ column_name }} is not null
|
||||
and parent.{{ field }} is null
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
|
||||
{% test relationships(model, to, field) %}
|
||||
{% test relationships(model, column_name, to, field) %}
|
||||
{% set macro = adapter.dispatch('test_relationships') %}
|
||||
{{ macro(model, to, field, **kwargs) }}
|
||||
{{ macro(model, column_name, to, field) }}
|
||||
{% endtest %}
|
||||
|
||||
@@ -1,25 +1,18 @@
|
||||
{% macro default__test_unique(model, column_name) %}
|
||||
|
||||
{% macro default__test_unique(model) %}
|
||||
select
|
||||
{{ column_name }},
|
||||
count(*) as n_records
|
||||
|
||||
{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
|
||||
|
||||
select count(*) as validation_errors
|
||||
from (
|
||||
|
||||
select
|
||||
{{ column_name }}
|
||||
|
||||
from {{ model }}
|
||||
where {{ column_name }} is not null
|
||||
group by {{ column_name }}
|
||||
having count(*) > 1
|
||||
|
||||
) validation_errors
|
||||
from {{ model }}
|
||||
where {{ column_name }} is not null
|
||||
group by {{ column_name }}
|
||||
having count(*) > 1
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% test unique(model) %}
|
||||
{% test unique(model, column_name) %}
|
||||
{% set macro = adapter.dispatch('test_unique') %}
|
||||
{{ macro(model, **kwargs) }}
|
||||
{{ macro(model, column_name) }}
|
||||
{% endtest %}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -719,6 +719,13 @@ def _build_test_subparser(subparsers, base_subparser):
|
||||
Stop execution upon a first test failure.
|
||||
'''
|
||||
)
|
||||
sub.add_argument(
|
||||
'--store-failures',
|
||||
action='store_true',
|
||||
help='''
|
||||
Store test results (failing rows) in the database
|
||||
'''
|
||||
)
|
||||
|
||||
sub.set_defaults(cls=test_task.TestTask, which='test', rpc_method='test')
|
||||
return sub
|
||||
@@ -998,12 +1005,23 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
# if set, extract all models and blocks with the jinja block extractor, and
|
||||
# verify that we don't fail anywhere the actual jinja parser passes. The
|
||||
# reverse (passing files that ends up failing jinja) is fine.
|
||||
# TODO remove?
|
||||
p.add_argument(
|
||||
'--test-new-parser',
|
||||
action='store_true',
|
||||
help=argparse.SUPPRESS
|
||||
)
|
||||
|
||||
# if set, will use the tree-sitter-jinja2 parser and extractor instead of
|
||||
# jinja rendering when possible.
|
||||
p.add_argument(
|
||||
'--use-experimental-parser',
|
||||
action='store_true',
|
||||
help='''
|
||||
Uses an experimental parser to extract jinja values.
|
||||
'''
|
||||
)
|
||||
|
||||
subs = p.add_subparsers(title="Available sub-commands")
|
||||
|
||||
base_subparser = _build_base_subparser()
|
||||
@@ -1051,7 +1069,7 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
parsed = p.parse_args(args)
|
||||
|
||||
if hasattr(parsed, 'profiles_dir'):
|
||||
parsed.profiles_dir = os.path.expanduser(parsed.profiles_dir)
|
||||
parsed.profiles_dir = os.path.abspath(parsed.profiles_dir)
|
||||
|
||||
if getattr(parsed, 'project_dir', None) is not None:
|
||||
expanded_user = os.path.expanduser(parsed.project_dir)
|
||||
|
||||
@@ -2,7 +2,7 @@ import abc
|
||||
import itertools
|
||||
import os
|
||||
from typing import (
|
||||
List, Dict, Any, Generic, TypeVar
|
||||
List, Dict, Any, Generic, Optional, TypeVar
|
||||
)
|
||||
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
@@ -55,11 +55,25 @@ class BaseParser(Generic[FinalValue]):
|
||||
def resource_type(self) -> NodeType:
|
||||
pass
|
||||
|
||||
def generate_unique_id(self, resource_name: str) -> str:
|
||||
"""Returns a unique identifier for a resource"""
|
||||
return "{}.{}.{}".format(self.resource_type,
|
||||
self.project.project_name,
|
||||
resource_name)
|
||||
def generate_unique_id(
|
||||
self,
|
||||
resource_name: str,
|
||||
hash: Optional[str] = None
|
||||
) -> str:
|
||||
"""Returns a unique identifier for a resource
|
||||
An optional hash may be passed in to ensure uniqueness for edge cases"""
|
||||
|
||||
return '.'.join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
self.resource_type,
|
||||
self.project.project_name,
|
||||
resource_name,
|
||||
hash
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
from typing import Iterable
|
||||
from typing import Iterable, Optional
|
||||
|
||||
import re
|
||||
|
||||
from dbt.clients.jinja import get_rendered
|
||||
from dbt.contracts.files import SourceFile
|
||||
from dbt.contracts.graph.parsed import ParsedDocumentation
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import Parser
|
||||
@@ -23,7 +24,7 @@ class DocumentationParser(Parser[ParsedDocumentation]):
|
||||
def get_compiled_path(cls, block: FileBlock):
|
||||
return block.path.relative_path
|
||||
|
||||
def generate_unique_id(self, resource_name: str) -> str:
|
||||
def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str:
|
||||
# because docs are in their own graph namespace, node type doesn't
|
||||
# need to be part of the unique ID.
|
||||
return '{}.{}'.format(self.project.project_name, resource_name)
|
||||
@@ -46,6 +47,7 @@ class DocumentationParser(Parser[ParsedDocumentation]):
|
||||
return [doc]
|
||||
|
||||
def parse_file(self, file_block: FileBlock):
|
||||
assert isinstance(file_block.file, SourceFile)
|
||||
searcher: Iterable[BlockContents] = BlockSearcher(
|
||||
source=[file_block],
|
||||
allowed_blocks={'docs'},
|
||||
|
||||
@@ -5,7 +5,7 @@ import jinja2
|
||||
from dbt.clients import jinja
|
||||
from dbt.contracts.graph.unparsed import UnparsedMacro
|
||||
from dbt.contracts.graph.parsed import ParsedMacro
|
||||
from dbt.contracts.files import FilePath
|
||||
from dbt.contracts.files import FilePath, SourceFile
|
||||
from dbt.exceptions import CompilationException
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
@@ -92,12 +92,10 @@ class MacroParser(BaseParser[ParsedMacro]):
|
||||
yield node
|
||||
|
||||
def parse_file(self, block: FileBlock):
|
||||
# mark the file as seen, even if there are no macros in it
|
||||
self.manifest.get_file(block.file)
|
||||
assert isinstance(block.file, SourceFile)
|
||||
source_file = block.file
|
||||
|
||||
assert isinstance(source_file.contents, str)
|
||||
original_file_path = source_file.path.original_file_path
|
||||
|
||||
logger.debug("Parsing {}".format(original_file_path))
|
||||
|
||||
# this is really only used for error messages
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
import os
|
||||
import pickle
|
||||
from typing import (
|
||||
Dict, Optional, Mapping, Callable, Any, List, Type, Union
|
||||
)
|
||||
@@ -19,14 +18,17 @@ from dbt.adapters.factory import (
|
||||
from dbt.helper_types import PathSet
|
||||
from dbt.logger import GLOBAL_LOGGER as logger, DbtProcessState
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.clients.jinja import get_rendered, statically_extract_macro_calls
|
||||
from dbt.clients.jinja import get_rendered, MacroStack
|
||||
from dbt.clients.jinja_static import statically_extract_macro_calls
|
||||
from dbt.clients.system import make_directory
|
||||
from dbt.config import Project, RuntimeConfig
|
||||
from dbt.context.docs import generate_runtime_docs
|
||||
from dbt.context.macro_resolver import MacroResolver
|
||||
from dbt.context.base import generate_base_context
|
||||
from dbt.contracts.files import FileHash, ParseFileType
|
||||
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
|
||||
from dbt.context.configured import generate_macro_context
|
||||
from dbt.context.providers import ParseProvider
|
||||
from dbt.contracts.files import FileHash, ParseFileType, SchemaSourceFile
|
||||
from dbt.parser.read_files import read_files, load_source_file
|
||||
from dbt.parser.partial import PartialParsing
|
||||
from dbt.contracts.graph.compiled import ManifestNode
|
||||
from dbt.contracts.graph.manifest import (
|
||||
Manifest, Disabled, MacroManifest, ManifestStateCheck
|
||||
@@ -42,7 +44,7 @@ from dbt.exceptions import (
|
||||
get_source_not_found_or_disabled_msg,
|
||||
warn_or_error,
|
||||
)
|
||||
from dbt.parser.base import BaseParser, Parser
|
||||
from dbt.parser.base import Parser
|
||||
from dbt.parser.analysis import AnalysisParser
|
||||
from dbt.parser.data_test import DataTestParser
|
||||
from dbt.parser.docs import DocumentationParser
|
||||
@@ -53,13 +55,13 @@ from dbt.parser.schemas import SchemaParser
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt.parser.seeds import SeedParser
|
||||
from dbt.parser.snapshots import SnapshotParser
|
||||
from dbt.parser.sources import patch_sources
|
||||
from dbt.parser.sources import SourcePatcher
|
||||
from dbt.ui import warning_tag
|
||||
from dbt.version import __version__
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle'
|
||||
PARTIAL_PARSE_FILE_NAME = 'partial_parse.msgpack'
|
||||
PARSING_STATE = DbtProcessState('parsing')
|
||||
DEFAULT_PARTIAL_PARSE = False
|
||||
|
||||
@@ -77,7 +79,7 @@ class ParserInfo(dbtClassMixin):
|
||||
class ProjectLoaderInfo(dbtClassMixin):
|
||||
project_name: str
|
||||
elapsed: float
|
||||
parsers: List[ParserInfo]
|
||||
parsers: List[ParserInfo] = field(default_factory=list)
|
||||
path_count: int = 0
|
||||
|
||||
|
||||
@@ -93,6 +95,11 @@ class ManifestLoaderInfo(dbtClassMixin, Writable):
|
||||
process_manifest_elapsed: Optional[float] = None
|
||||
load_all_elapsed: Optional[float] = None
|
||||
projects: List[ProjectLoaderInfo] = field(default_factory=list)
|
||||
_project_index: Dict[str, ProjectLoaderInfo] = field(default_factory=dict)
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
del dct['_project_index']
|
||||
return dct
|
||||
|
||||
|
||||
# The ManifestLoader loads the manifest. The standard way to use the
|
||||
@@ -107,8 +114,9 @@ class ManifestLoader:
|
||||
) -> None:
|
||||
self.root_project: RuntimeConfig = root_project
|
||||
self.all_projects: Mapping[str, Project] = all_projects
|
||||
self.manifest: Manifest = Manifest({}, {}, {}, {}, {}, {}, [], {})
|
||||
self.manifest: Manifest = Manifest()
|
||||
self.manifest.metadata = root_project.get_metadata()
|
||||
self.started_at = int(time.time())
|
||||
# This is a MacroQueryStringSetter callable, which is called
|
||||
# later after we set the MacroManifest in the adapter. It sets
|
||||
# up the query headers.
|
||||
@@ -118,14 +126,19 @@ class ManifestLoader:
|
||||
else:
|
||||
self.macro_hook = macro_hook
|
||||
|
||||
self._perf_info = ManifestLoaderInfo(
|
||||
is_partial_parse_enabled=self._partial_parse_enabled()
|
||||
)
|
||||
# State check determines whether the old_manifest and the current
|
||||
self._perf_info = self.build_perf_info()
|
||||
|
||||
# State check determines whether the saved_manifest and the current
|
||||
# manifest match well enough to do partial parsing
|
||||
self.manifest.state_check = self.build_manifest_state_check()
|
||||
# We need to know if we're actually partially parsing. It could
|
||||
# have been enabled, but not happening because of some issue.
|
||||
self.partially_parsing = False
|
||||
|
||||
self._perf_info = self.build_perf_info()
|
||||
|
||||
# This is a saved manifest from a previous run that's used for partial parsing
|
||||
self.old_manifest: Optional[Manifest] = self.read_saved_manifest()
|
||||
self.saved_manifest: Optional[Manifest] = self.read_manifest_for_partial_parse()
|
||||
|
||||
# This is the method that builds a complete manifest. We sometimes
|
||||
# use an abbreviated process in tests.
|
||||
@@ -151,12 +164,8 @@ class ManifestLoader:
|
||||
|
||||
projects = config.load_dependencies()
|
||||
loader = ManifestLoader(config, projects, macro_hook)
|
||||
loader.load()
|
||||
|
||||
# The goal is to move partial parse writing to after update_manifest
|
||||
loader.write_manifest_for_partial_parse()
|
||||
manifest = loader.update_manifest()
|
||||
# Move write_manifest_for_partial_parse here
|
||||
manifest = loader.load()
|
||||
|
||||
_check_manifest(manifest, config)
|
||||
manifest.build_flat_graph()
|
||||
@@ -176,9 +185,6 @@ class ManifestLoader:
|
||||
# This is where the main action happens
|
||||
def load(self):
|
||||
|
||||
if self.old_manifest is not None:
|
||||
logger.debug('Got an acceptable saved parse result')
|
||||
|
||||
# Read files creates a dictionary of projects to a dictionary
|
||||
# of parsers to lists of file strings. The file strings are
|
||||
# used to get the SourceFiles from the manifest files.
|
||||
@@ -192,43 +198,132 @@ class ManifestLoader:
|
||||
read_files(project, self.manifest.files, project_parser_files)
|
||||
self._perf_info.read_files_elapsed = (time.perf_counter() - start_read_files)
|
||||
|
||||
# We need to parse the macros first, so they're resolvable when
|
||||
# the other files are loaded
|
||||
start_load_macros = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
parser = MacroParser(project, self.manifest)
|
||||
parser_files = project_parser_files[project.project_name]
|
||||
for search_key in parser_files['MacroParser']:
|
||||
block = FileBlock(self.manifest.files[search_key])
|
||||
self.parse_with_cache(block, parser)
|
||||
self.reparse_macros()
|
||||
# This is where a loop over self.manifest.macros should be performed
|
||||
# to set the 'depends_on' information from static rendering.
|
||||
self._perf_info.load_macros_elapsed = (time.perf_counter() - start_load_macros)
|
||||
skip_parsing = False
|
||||
if self.saved_manifest is not None:
|
||||
partial_parsing = PartialParsing(self.saved_manifest, self.manifest.files)
|
||||
skip_parsing = partial_parsing.skip_parsing()
|
||||
if not skip_parsing:
|
||||
# create child_map and parent_map
|
||||
self.saved_manifest.build_parent_and_child_maps()
|
||||
# files are different, we need to create a new set of
|
||||
# project_parser_files.
|
||||
project_parser_files = partial_parsing.get_parsing_files()
|
||||
self.manifest = self.saved_manifest
|
||||
self.partially_parsing = True
|
||||
|
||||
# Now that the macros are parsed, parse the rest of the files.
|
||||
# This is currently done on a per project basis,
|
||||
# but in the future we may change that
|
||||
start_parse_projects = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
self.parse_project(project, project_parser_files[project.project_name])
|
||||
self._perf_info.parse_project_elapsed = (time.perf_counter() - start_parse_projects)
|
||||
if skip_parsing:
|
||||
logger.info("Partial parsing enabled, no changes found, skipping parsing")
|
||||
self.manifest = self.saved_manifest
|
||||
|
||||
# Parse every file in this project, except macros (already done)
|
||||
else:
|
||||
# Load Macros
|
||||
# We need to parse the macros first, so they're resolvable when
|
||||
# the other files are loaded
|
||||
start_load_macros = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
if project.project_name not in project_parser_files:
|
||||
continue
|
||||
parser_files = project_parser_files[project.project_name]
|
||||
if 'MacroParser' not in parser_files:
|
||||
continue
|
||||
parser = MacroParser(project, self.manifest)
|
||||
for file_id in parser_files['MacroParser']:
|
||||
block = FileBlock(self.manifest.files[file_id])
|
||||
parser.parse_file(block)
|
||||
# Look at changed macros and update the macro.depends_on.macros
|
||||
self.macro_depends_on()
|
||||
self._perf_info.load_macros_elapsed = (time.perf_counter() - start_load_macros)
|
||||
|
||||
# Now that the macros are parsed, parse the rest of the files.
|
||||
# This is currently done on a per project basis.
|
||||
start_parse_projects = time.perf_counter()
|
||||
|
||||
# Load the rest of the files except for schema yaml files
|
||||
parser_types: List[Type[Parser]] = [
|
||||
ModelParser, SnapshotParser, AnalysisParser, DataTestParser,
|
||||
SeedParser, DocumentationParser, HookParser]
|
||||
for project in self.all_projects.values():
|
||||
if project.project_name not in project_parser_files:
|
||||
continue
|
||||
self.parse_project(
|
||||
project,
|
||||
project_parser_files[project.project_name],
|
||||
parser_types
|
||||
)
|
||||
|
||||
# Now that we've loaded most of the nodes (except for schema tests and sources)
|
||||
# load up the Lookup objects to resolve them by name, so the SourceFiles store
|
||||
# the unique_id instead of the name. Sources are loaded from yaml files, so
|
||||
# aren't in place yet
|
||||
self.manifest.rebuild_ref_lookup()
|
||||
self.manifest.rebuild_doc_lookup()
|
||||
|
||||
# Load yaml files
|
||||
parser_types = [SchemaParser]
|
||||
for project in self.all_projects.values():
|
||||
if project.project_name not in project_parser_files:
|
||||
continue
|
||||
self.parse_project(
|
||||
project,
|
||||
project_parser_files[project.project_name],
|
||||
parser_types
|
||||
)
|
||||
|
||||
self._perf_info.parse_project_elapsed = (time.perf_counter() - start_parse_projects)
|
||||
|
||||
# patch_sources converts the UnparsedSourceDefinitions in the
|
||||
# Manifest.sources to ParsedSourceDefinition via 'patch_source'
|
||||
# in SourcePatcher
|
||||
start_patch = time.perf_counter()
|
||||
patcher = SourcePatcher(self.root_project, self.manifest)
|
||||
patcher.construct_sources()
|
||||
self.manifest.sources = patcher.sources
|
||||
self._perf_info.patch_sources_elapsed = (
|
||||
time.perf_counter() - start_patch
|
||||
)
|
||||
|
||||
# ParseResults had a 'disabled' attribute which was a dictionary
|
||||
# which is now named '_disabled'. This used to copy from
|
||||
# ParseResults to the Manifest.
|
||||
# TODO: normalize to only one disabled
|
||||
disabled = []
|
||||
for value in self.manifest._disabled.values():
|
||||
disabled.extend(value)
|
||||
self.manifest.disabled = disabled
|
||||
|
||||
# copy the selectors from the root_project to the manifest
|
||||
self.manifest.selectors = self.root_project.manifest_selectors
|
||||
|
||||
# update the refs, sources, and docs
|
||||
# These check the created_at time on the nodes to
|
||||
# determine whether they need processinga.
|
||||
start_process = time.perf_counter()
|
||||
self.process_sources(self.root_project.project_name)
|
||||
self.process_refs(self.root_project.project_name)
|
||||
self.process_docs(self.root_project)
|
||||
self._perf_info.process_manifest_elapsed = (
|
||||
time.perf_counter() - start_process
|
||||
)
|
||||
|
||||
# write out the fully parsed manifest
|
||||
self.write_manifest_for_partial_parse()
|
||||
|
||||
return self.manifest
|
||||
|
||||
# Parse the files in the 'parser_files' dictionary, for parsers listed in
|
||||
# 'parser_types'
|
||||
def parse_project(
|
||||
self,
|
||||
project: Project,
|
||||
parser_files
|
||||
parser_files,
|
||||
parser_types: List[Type[Parser]],
|
||||
) -> None:
|
||||
|
||||
project_parser_info: List[ParserInfo] = []
|
||||
project_loader_info = self._perf_info._project_index[project.project_name]
|
||||
start_timer = time.perf_counter()
|
||||
total_path_count = 0
|
||||
|
||||
# Loop through parsers with loaded files. Note: SchemaParser must be last
|
||||
parser_types: List[Type[Parser]] = [
|
||||
ModelParser, SnapshotParser, AnalysisParser, DataTestParser,
|
||||
SeedParser, DocumentationParser, SchemaParser]
|
||||
# Loop through parsers with loaded files.
|
||||
for parser_cls in parser_types:
|
||||
parser_name = parser_cls.__name__
|
||||
# No point in creating a parser if we don't have files for it
|
||||
@@ -241,13 +336,21 @@ class ManifestLoader:
|
||||
|
||||
# Parse the project files for this parser
|
||||
parser: Parser = parser_cls(project, self.manifest, self.root_project)
|
||||
for search_key in parser_files[parser_name]:
|
||||
block = FileBlock(self.manifest.files[search_key])
|
||||
self.parse_with_cache(block, parser)
|
||||
for file_id in parser_files[parser_name]:
|
||||
block = FileBlock(self.manifest.files[file_id])
|
||||
if isinstance(parser, SchemaParser):
|
||||
assert isinstance(block.file, SchemaSourceFile)
|
||||
if self.partially_parsing:
|
||||
dct = block.file.pp_dict
|
||||
else:
|
||||
dct = block.file.dict_from_yaml
|
||||
parser.parse_file(block, dct=dct)
|
||||
else:
|
||||
parser.parse_file(block)
|
||||
parser_path_count = parser_path_count + 1
|
||||
|
||||
# Save timing info
|
||||
project_parser_info.append(ParserInfo(
|
||||
project_loader_info.parsers.append(ParserInfo(
|
||||
parser=parser.resource_type,
|
||||
path_count=parser_path_count,
|
||||
elapsed=time.perf_counter() - parser_start_timer
|
||||
@@ -256,27 +359,28 @@ class ManifestLoader:
|
||||
|
||||
# HookParser doesn't run from loaded files, just dbt_project.yml,
|
||||
# so do separately
|
||||
hook_parser = HookParser(project, self.manifest, self.root_project)
|
||||
path = hook_parser.get_path()
|
||||
file_block = FileBlock(load_source_file(path, ParseFileType.Hook, project.project_name))
|
||||
self.parse_with_cache(file_block, hook_parser)
|
||||
# This shouldn't need to be parsed again if we're starting from
|
||||
# a saved manifest, because that won't be allowed if dbt_project.yml
|
||||
# changed, but leave for now.
|
||||
if not self.partially_parsing and HookParser in parser_types:
|
||||
hook_parser = HookParser(project, self.manifest, self.root_project)
|
||||
path = hook_parser.get_path()
|
||||
file_block = FileBlock(
|
||||
load_source_file(path, ParseFileType.Hook, project.project_name)
|
||||
)
|
||||
hook_parser.parse_file(file_block)
|
||||
|
||||
# Store the performance info
|
||||
elapsed = time.perf_counter() - start_timer
|
||||
project_info = ProjectLoaderInfo(
|
||||
project_name=project.project_name,
|
||||
path_count=total_path_count,
|
||||
elapsed=elapsed,
|
||||
parsers=project_parser_info
|
||||
)
|
||||
self._perf_info.projects.append(project_info)
|
||||
project_loader_info.path_count = project_loader_info.path_count + total_path_count
|
||||
project_loader_info.elapsed = project_loader_info.elapsed + elapsed
|
||||
self._perf_info.path_count = (
|
||||
self._perf_info.path_count + total_path_count
|
||||
)
|
||||
|
||||
# Loop through macros in the manifest and statically parse
|
||||
# the 'macro_sql' to find depends_on.macros
|
||||
def reparse_macros(self):
|
||||
def macro_depends_on(self):
|
||||
internal_package_names = get_adapter_package_names(
|
||||
self.root_project.credentials.type
|
||||
)
|
||||
@@ -285,86 +389,59 @@ class ManifestLoader:
|
||||
self.root_project.project_name,
|
||||
internal_package_names
|
||||
)
|
||||
base_ctx = generate_base_context({})
|
||||
macro_ctx = generate_macro_context(self.root_project)
|
||||
macro_namespace = TestMacroNamespace(
|
||||
macro_resolver, {}, None, MacroStack(), []
|
||||
)
|
||||
adapter = get_adapter(self.root_project)
|
||||
db_wrapper = ParseProvider().DatabaseWrapper(
|
||||
adapter, macro_namespace
|
||||
)
|
||||
for macro in self.manifest.macros.values():
|
||||
possible_macro_calls = statically_extract_macro_calls(macro.macro_sql, base_ctx)
|
||||
if macro.created_at < self.started_at:
|
||||
continue
|
||||
possible_macro_calls = statically_extract_macro_calls(
|
||||
macro.macro_sql, macro_ctx, db_wrapper)
|
||||
for macro_name in possible_macro_calls:
|
||||
# adapter.dispatch calls can generate a call with the same name as the macro
|
||||
# it ought to be an adapter prefix (postgres_) or default_
|
||||
if macro_name == macro.name:
|
||||
continue
|
||||
dep_macro_id = macro_resolver.get_macro_id(macro.package_name, macro_name)
|
||||
package_name = macro.package_name
|
||||
if '.' in macro_name:
|
||||
package_name, macro_name = macro_name.split('.')
|
||||
dep_macro_id = macro_resolver.get_macro_id(package_name, macro_name)
|
||||
if dep_macro_id:
|
||||
macro.depends_on.add_macro(dep_macro_id) # will check for dupes
|
||||
|
||||
# This is where we use the partial-parse state from the
|
||||
# pickle file (if it exists)
|
||||
def parse_with_cache(
|
||||
self,
|
||||
block: FileBlock,
|
||||
parser: BaseParser,
|
||||
) -> None:
|
||||
# _get_cached actually copies the nodes, etc, that were
|
||||
# generated from the file to the results, in 'sanitized_update'
|
||||
if not self._get_cached(block, parser):
|
||||
parser.parse_file(block)
|
||||
|
||||
# check if we have a stored parse file, then check if
|
||||
# file checksums are the same or not and either return
|
||||
# the old ... stuff or return false (not cached)
|
||||
def _get_cached(
|
||||
self,
|
||||
block: FileBlock,
|
||||
parser: BaseParser,
|
||||
) -> bool:
|
||||
# TODO: handle multiple parsers w/ same files, by
|
||||
# tracking parser type vs node type? Or tracking actual
|
||||
# parser type during parsing?
|
||||
if self.old_manifest is None:
|
||||
return False
|
||||
# The 'has_file' method is where we check to see if
|
||||
# the checksum of the old file is the same as the new
|
||||
# file. If the checksum is different, 'has_file' returns
|
||||
# false. If it's the same, the file and the things that
|
||||
# were generated from it are used.
|
||||
if self.old_manifest.has_file(block.file):
|
||||
return self.manifest.sanitized_update(
|
||||
block.file, self.old_manifest, parser.resource_type
|
||||
)
|
||||
return False
|
||||
|
||||
def write_manifest_for_partial_parse(self):
|
||||
path = os.path.join(self.root_project.target_path,
|
||||
PARTIAL_PARSE_FILE_NAME)
|
||||
make_directory(self.root_project.target_path)
|
||||
with open(path, 'wb') as fp:
|
||||
pickle.dump(self.manifest, fp)
|
||||
try:
|
||||
manifest_msgpack = self.manifest.to_msgpack()
|
||||
make_directory(os.path.dirname(path))
|
||||
with open(path, 'wb') as fp:
|
||||
fp.write(manifest_msgpack)
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
def matching_parse_results(self, manifest: Manifest) -> bool:
|
||||
"""Compare the global hashes of the read-in parse results' values to
|
||||
the known ones, and return if it is ok to re-use the results.
|
||||
"""
|
||||
try:
|
||||
if manifest.metadata.dbt_version != __version__:
|
||||
logger.debug(
|
||||
'dbt version mismatch: {} != {}, cache invalidated'
|
||||
.format(manifest.metadata.dbt_version, __version__)
|
||||
)
|
||||
return False
|
||||
except AttributeError as exc:
|
||||
logger.debug(f"malformed result file, cache invalidated: {exc}")
|
||||
return False
|
||||
|
||||
valid = True
|
||||
|
||||
if not self.manifest.state_check or not manifest.state_check:
|
||||
return False
|
||||
|
||||
if manifest.metadata.dbt_version != __version__:
|
||||
logger.info("Unable to do partial parsing because of a dbt version mismatch")
|
||||
return False # If the version is wrong, the other checks might not work
|
||||
if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
|
||||
logger.debug('vars hash mismatch, cache invalidated')
|
||||
logger.info("Unable to do partial parsing because config vars, "
|
||||
"config profile, or config target have changed")
|
||||
valid = False
|
||||
if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
|
||||
logger.debug('profile hash mismatch, cache invalidated')
|
||||
# Note: This should be made more granular. We shouldn't need to invalidate
|
||||
# partial parsing if a non-used profile section has changed.
|
||||
logger.info("Unable to do partial parsing because profile has changed")
|
||||
valid = False
|
||||
|
||||
missing_keys = {
|
||||
@@ -372,21 +449,15 @@ class ManifestLoader:
|
||||
if k not in manifest.state_check.project_hashes
|
||||
}
|
||||
if missing_keys:
|
||||
logger.debug(
|
||||
'project hash mismatch: values missing, cache invalidated: {}'
|
||||
.format(missing_keys)
|
||||
)
|
||||
logger.info("Unable to do partial parsing because a project dependency has been added")
|
||||
valid = False
|
||||
|
||||
for key, new_value in self.manifest.state_check.project_hashes.items():
|
||||
if key in manifest.state_check.project_hashes:
|
||||
old_value = manifest.state_check.project_hashes[key]
|
||||
if new_value != old_value:
|
||||
logger.debug(
|
||||
'For key {}, hash mismatch ({} -> {}), cache '
|
||||
'invalidated'
|
||||
.format(key, old_value, new_value)
|
||||
)
|
||||
logger.info("Unable to do partial parsing because "
|
||||
"a project config has changed")
|
||||
valid = False
|
||||
return valid
|
||||
|
||||
@@ -400,7 +471,7 @@ class ManifestLoader:
|
||||
else:
|
||||
return DEFAULT_PARTIAL_PARSE
|
||||
|
||||
def read_saved_manifest(self) -> Optional[Manifest]:
|
||||
def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
|
||||
if not self._partial_parse_enabled():
|
||||
logger.debug('Partial parsing not enabled')
|
||||
return None
|
||||
@@ -410,7 +481,8 @@ class ManifestLoader:
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
with open(path, 'rb') as fp:
|
||||
manifest: Manifest = pickle.load(fp)
|
||||
manifest_mp = fp.read()
|
||||
manifest: Manifest = Manifest.from_msgpack(manifest_mp) # type: ignore
|
||||
# keep this check inside the try/except in case something about
|
||||
# the file has changed in weird ways, perhaps due to being a
|
||||
# different version of dbt
|
||||
@@ -422,50 +494,24 @@ class ManifestLoader:
|
||||
.format(path, exc),
|
||||
exc_info=True
|
||||
)
|
||||
else:
|
||||
logger.info(f"Unable to do partial parsing because {path} not found")
|
||||
|
||||
return None
|
||||
|
||||
# This find the sources, refs, and docs and resolves them
|
||||
# for nodes and exposures
|
||||
def process_manifest(self):
|
||||
project_name = self.root_project.project_name
|
||||
process_sources(self.manifest, project_name)
|
||||
process_refs(self.manifest, project_name)
|
||||
process_docs(self.manifest, self.root_project)
|
||||
|
||||
def update_manifest(self) -> Manifest:
|
||||
start_patch = time.perf_counter()
|
||||
# patch_sources converts the UnparsedSourceDefinitions in the
|
||||
# Manifest.sources to ParsedSourceDefinition via 'patch_source'
|
||||
# in SourcePatcher
|
||||
sources = patch_sources(self.root_project, self.manifest)
|
||||
self.manifest.sources = sources
|
||||
# ParseResults had a 'disabled' attribute which was a dictionary
|
||||
# which is now named '_disabled'. This used to copy from
|
||||
# ParseResults to the Manifest. Can this be normalized so
|
||||
# there's only one disabled?
|
||||
disabled = []
|
||||
for value in self.manifest._disabled.values():
|
||||
disabled.extend(value)
|
||||
self.manifest.disabled = disabled
|
||||
self._perf_info.patch_sources_elapsed = (
|
||||
time.perf_counter() - start_patch
|
||||
def build_perf_info(self):
|
||||
mli = ManifestLoaderInfo(
|
||||
is_partial_parse_enabled=self._partial_parse_enabled()
|
||||
)
|
||||
|
||||
self.manifest.selectors = self.root_project.manifest_selectors
|
||||
|
||||
# do the node and macro patches
|
||||
self.manifest.patch_nodes()
|
||||
self.manifest.patch_macros()
|
||||
|
||||
# process_manifest updates the refs, sources, and docs
|
||||
start_process = time.perf_counter()
|
||||
self.process_manifest()
|
||||
|
||||
self._perf_info.process_manifest_elapsed = (
|
||||
time.perf_counter() - start_process
|
||||
)
|
||||
|
||||
return self.manifest
|
||||
for project in self.all_projects.values():
|
||||
project_info = ProjectLoaderInfo(
|
||||
project_name=project.project_name,
|
||||
path_count=0,
|
||||
elapsed=0,
|
||||
)
|
||||
mli.projects.append(project_info)
|
||||
mli._project_index[project.project_name] = project_info
|
||||
return mli
|
||||
|
||||
# TODO: this should be calculated per-file based on the vars() calls made in
|
||||
# parsing, so changing one var doesn't invalidate everything. also there should
|
||||
@@ -523,7 +569,7 @@ class ManifestLoader:
|
||||
block = FileBlock(source_file)
|
||||
# This does not add the file to the manifest.files,
|
||||
# but that shouldn't be necessary here.
|
||||
self.parse_with_cache(block, macro_parser)
|
||||
macro_parser.parse_file(block)
|
||||
macro_manifest = MacroManifest(self.manifest.macros)
|
||||
return macro_manifest
|
||||
|
||||
@@ -570,6 +616,80 @@ class ManifestLoader:
|
||||
),
|
||||
})
|
||||
|
||||
# Takes references in 'refs' array of nodes and exposures, finds the target
|
||||
# node, and updates 'depends_on.nodes' with the unique id
|
||||
def process_refs(self, current_project: str):
|
||||
for node in self.manifest.nodes.values():
|
||||
if node.created_at < self.started_at:
|
||||
continue
|
||||
_process_refs_for_node(self.manifest, current_project, node)
|
||||
for exposure in self.manifest.exposures.values():
|
||||
if exposure.created_at < self.started_at:
|
||||
continue
|
||||
_process_refs_for_exposure(self.manifest, current_project, exposure)
|
||||
|
||||
# nodes: node and column descriptions
|
||||
# sources: source and table descriptions, column descriptions
|
||||
# macros: macro argument descriptions
|
||||
# exposures: exposure descriptions
|
||||
def process_docs(self, config: RuntimeConfig):
|
||||
for node in self.manifest.nodes.values():
|
||||
if node.created_at < self.started_at:
|
||||
continue
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
node,
|
||||
self.manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_node(ctx, node)
|
||||
for source in self.manifest.sources.values():
|
||||
if source.created_at < self.started_at:
|
||||
continue
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
source,
|
||||
self.manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_source(ctx, source)
|
||||
for macro in self.manifest.macros.values():
|
||||
if macro.created_at < self.started_at:
|
||||
continue
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
macro,
|
||||
self.manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_macro(ctx, macro)
|
||||
for exposure in self.manifest.exposures.values():
|
||||
if exposure.created_at < self.started_at:
|
||||
continue
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
exposure,
|
||||
self.manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_exposure(ctx, exposure)
|
||||
|
||||
# Loops through all nodes and exposures, for each element in
|
||||
# 'sources' array finds the source node and updates the
|
||||
# 'depends_on.nodes' array with the unique id
|
||||
def process_sources(self, current_project: str):
|
||||
for node in self.manifest.nodes.values():
|
||||
if node.resource_type == NodeType.Source:
|
||||
continue
|
||||
assert not isinstance(node, ParsedSourceDefinition)
|
||||
if node.created_at < self.started_at:
|
||||
continue
|
||||
_process_sources_for_node(self.manifest, current_project, node)
|
||||
for exposure in self.manifest.exposures.values():
|
||||
if exposure.created_at < self.started_at:
|
||||
continue
|
||||
_process_sources_for_exposure(self.manifest, current_project, exposure)
|
||||
|
||||
|
||||
def invalid_ref_fail_unless_test(node, target_model_name,
|
||||
target_model_package, disabled):
|
||||
@@ -625,7 +745,7 @@ def _check_resource_uniqueness(
|
||||
alias_resources: Dict[str, ManifestNode] = {}
|
||||
|
||||
for resource, node in manifest.nodes.items():
|
||||
if node.resource_type not in NodeType.refable():
|
||||
if not node.is_relational:
|
||||
continue
|
||||
# appease mypy - sources aren't refable!
|
||||
assert not isinstance(node, ParsedSourceDefinition)
|
||||
@@ -743,45 +863,6 @@ def _process_docs_for_exposure(
|
||||
exposure.description = get_rendered(exposure.description, context)
|
||||
|
||||
|
||||
# nodes: node and column descriptions
|
||||
# sources: source and table descriptions, column descriptions
|
||||
# macros: macro argument descriptions
|
||||
# exposures: exposure descriptions
|
||||
def process_docs(manifest: Manifest, config: RuntimeConfig):
|
||||
for node in manifest.nodes.values():
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
node,
|
||||
manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_node(ctx, node)
|
||||
for source in manifest.sources.values():
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
source,
|
||||
manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_source(ctx, source)
|
||||
for macro in manifest.macros.values():
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
macro,
|
||||
manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_macro(ctx, macro)
|
||||
for exposure in manifest.exposures.values():
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
exposure,
|
||||
manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_exposure(ctx, exposure)
|
||||
|
||||
|
||||
def _process_refs_for_exposure(
|
||||
manifest: Manifest, current_project: str, exposure: ParsedExposure
|
||||
):
|
||||
@@ -869,16 +950,6 @@ def _process_refs_for_node(
|
||||
manifest.update_node(node)
|
||||
|
||||
|
||||
# Takes references in 'refs' array of nodes and exposures, finds the target
|
||||
# node, and updates 'depends_on.nodes' with the unique id
|
||||
def process_refs(manifest: Manifest, current_project: str):
|
||||
for node in manifest.nodes.values():
|
||||
_process_refs_for_node(manifest, current_project, node)
|
||||
for exposure in manifest.exposures.values():
|
||||
_process_refs_for_exposure(manifest, current_project, exposure)
|
||||
return manifest
|
||||
|
||||
|
||||
def _process_sources_for_exposure(
|
||||
manifest: Manifest, current_project: str, exposure: ParsedExposure
|
||||
):
|
||||
@@ -930,20 +1001,6 @@ def _process_sources_for_node(
|
||||
manifest.update_node(node)
|
||||
|
||||
|
||||
# Loops through all nodes and exposures, for each element in
|
||||
# 'sources' array finds the source node and updates the
|
||||
# 'depends_on.nodes' array with the unique id
|
||||
def process_sources(manifest: Manifest, current_project: str):
|
||||
for node in manifest.nodes.values():
|
||||
if node.resource_type == NodeType.Source:
|
||||
continue
|
||||
assert not isinstance(node, ParsedSourceDefinition)
|
||||
_process_sources_for_node(manifest, current_project, node)
|
||||
for exposure in manifest.exposures.values():
|
||||
_process_sources_for_exposure(manifest, current_project, exposure)
|
||||
return manifest
|
||||
|
||||
|
||||
# This is called in task.rpc.sql_commands when a "dynamic" node is
|
||||
# created in the manifest, in 'add_refs'
|
||||
def process_macro(
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from dbt.context.context_config import ContextConfig
|
||||
from dbt.contracts.graph.parsed import ParsedModelNode
|
||||
import dbt.flags as flags
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.base import IntermediateNode, SimpleSQLParser
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt.tree_sitter_jinja.extractor import extract_from_source
|
||||
|
||||
|
||||
class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
@@ -17,3 +20,48 @@ class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
@classmethod
|
||||
def get_compiled_path(cls, block: FileBlock):
|
||||
return block.path.relative_path
|
||||
|
||||
def render_update(
|
||||
self, node: IntermediateNode, config: ContextConfig
|
||||
) -> None:
|
||||
|
||||
# normal dbt run
|
||||
if not flags.USE_EXPERIMENTAL_PARSER:
|
||||
super().render_update(node, config)
|
||||
|
||||
# if the --use-experimental-parser flag was set
|
||||
else:
|
||||
|
||||
# run dbt-jinja extractor (powered by tree-sitter)
|
||||
res = extract_from_source(node.raw_sql)
|
||||
|
||||
# if it doesn't need python jinja, fit the refs, sources, and configs
|
||||
# into the node. Down the line the rest of the node will be updated with
|
||||
# this information. (e.g. depends_on etc.)
|
||||
if not res['python_jinja']:
|
||||
|
||||
config_calls = []
|
||||
for c in res['configs']:
|
||||
config_calls.append({c[0]: c[1]})
|
||||
|
||||
config._config_calls = config_calls
|
||||
|
||||
# this uses the updated config to set all the right things in the node
|
||||
# if there are hooks present, it WILL render jinja. Will need to change
|
||||
# when we support hooks
|
||||
self.update_parsed_node(node, config)
|
||||
|
||||
# udpate the unrendered config with values from the file
|
||||
# values from yaml files are in there already
|
||||
node.unrendered_config.update(dict(res['configs']))
|
||||
|
||||
# set refs, sources, and configs on the node object
|
||||
node.refs = node.refs + res['refs']
|
||||
for sourcev in res['sources']:
|
||||
# TODO change extractor to match type here
|
||||
node.sources.append([sourcev[0], sourcev[1]])
|
||||
for configv in res['configs']:
|
||||
node.config[configv[0]] = configv[1]
|
||||
|
||||
else:
|
||||
super().render_update(node, config)
|
||||
|
||||
680
core/dbt/parser/partial.py
Normal file
680
core/dbt/parser/partial.py
Normal file
@@ -0,0 +1,680 @@
|
||||
from typing import MutableMapping, Dict
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.files import (
|
||||
AnySourceFile, ParseFileType, parse_file_type_to_parser,
|
||||
)
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
|
||||
mssat_files = (
|
||||
ParseFileType.Model,
|
||||
ParseFileType.Seed,
|
||||
ParseFileType.Snapshot,
|
||||
ParseFileType.Analysis,
|
||||
ParseFileType.Test,
|
||||
)
|
||||
|
||||
|
||||
key_to_prefix = {
|
||||
'models': 'model',
|
||||
'seeds': 'seed',
|
||||
'snapshots': 'snapshot',
|
||||
'analyses': 'analysis',
|
||||
}
|
||||
|
||||
|
||||
parse_file_type_to_key = {
|
||||
ParseFileType.Model: 'models',
|
||||
ParseFileType.Seed: 'seeds',
|
||||
ParseFileType.Snapshot: 'snapshots',
|
||||
ParseFileType.Analysis: 'analyses',
|
||||
}
|
||||
|
||||
|
||||
# Partial parsing. Create a diff of files from saved manifest and current
|
||||
# files and produce a project_parser_file dictionary to drive parsing of
|
||||
# only the necessary changes.
|
||||
# Will produce a 'skip_parsing' method, and a project_parser_file dictionary
|
||||
class PartialParsing:
|
||||
def __init__(self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]):
|
||||
self.saved_manifest = saved_manifest
|
||||
self.new_files = new_files
|
||||
self.project_parser_files: Dict = {}
|
||||
self.saved_files = self.saved_manifest.files
|
||||
self.project_parser_files = {}
|
||||
self.deleted_manifest = Manifest()
|
||||
self.build_file_diff()
|
||||
|
||||
def skip_parsing(self):
|
||||
return (
|
||||
not self.file_diff['deleted'] and
|
||||
not self.file_diff['added'] and
|
||||
not self.file_diff['changed'] and
|
||||
not self.file_diff['changed_schema_files'] and
|
||||
not self.file_diff['deleted_schema_files']
|
||||
)
|
||||
|
||||
# Compare the previously saved manifest files and the just-loaded manifest
|
||||
# files to see if anything changed
|
||||
def build_file_diff(self):
|
||||
saved_file_ids = set(self.saved_files.keys())
|
||||
new_file_ids = set(self.new_files.keys())
|
||||
deleted_all_files = saved_file_ids.difference(new_file_ids)
|
||||
added = new_file_ids.difference(saved_file_ids)
|
||||
common = saved_file_ids.intersection(new_file_ids)
|
||||
|
||||
# separate out deleted schema files
|
||||
deleted_schema_files = []
|
||||
deleted = []
|
||||
for file_id in deleted_all_files:
|
||||
if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
|
||||
deleted_schema_files.append(file_id)
|
||||
else:
|
||||
deleted.append(file_id)
|
||||
|
||||
changed = []
|
||||
changed_schema_files = []
|
||||
unchanged = []
|
||||
for file_id in common:
|
||||
if self.saved_files[file_id].checksum == self.new_files[file_id].checksum:
|
||||
unchanged.append(file_id)
|
||||
else:
|
||||
# separate out changed schema files
|
||||
if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
|
||||
sf = self.saved_files[file_id]
|
||||
if type(sf).__name__ != 'SchemaSourceFile':
|
||||
raise Exception(f"Serialization failure for {file_id}")
|
||||
changed_schema_files.append(file_id)
|
||||
else:
|
||||
changed.append(file_id)
|
||||
file_diff = {
|
||||
"deleted": deleted,
|
||||
"deleted_schema_files": deleted_schema_files,
|
||||
"added": added,
|
||||
"changed": changed,
|
||||
"changed_schema_files": changed_schema_files,
|
||||
"unchanged": unchanged,
|
||||
}
|
||||
logger.info(f"Partial parsing enabled: "
|
||||
f"{len(deleted) + len(deleted_schema_files)} files deleted, "
|
||||
f"{len(added)} files added, "
|
||||
f"{len(changed) + len(changed_schema_files)} files changed.")
|
||||
self.file_diff = file_diff
|
||||
|
||||
# generate the list of files that need parsing
|
||||
# uses self.manifest.files generated by 'read_files'
|
||||
def get_parsing_files(self):
|
||||
if self.skip_parsing():
|
||||
return {}
|
||||
# Need to add new files first, because changes in schema files
|
||||
# might refer to them
|
||||
for file_id in self.file_diff['added']:
|
||||
self.add_to_saved(file_id)
|
||||
# Need to process schema files next, because the dictionaries
|
||||
# need to be in place for handling SQL file changes
|
||||
for file_id in self.file_diff['changed_schema_files']:
|
||||
self.change_schema_file(file_id)
|
||||
for file_id in self.file_diff['deleted_schema_files']:
|
||||
self.delete_schema_file(file_id)
|
||||
for file_id in self.file_diff['deleted']:
|
||||
self.delete_from_saved(file_id)
|
||||
for file_id in self.file_diff['changed']:
|
||||
self.update_in_saved(file_id)
|
||||
return self.project_parser_files
|
||||
|
||||
# Add the file to the project parser dictionaries to schedule parsing
|
||||
def add_to_pp_files(self, source_file):
|
||||
file_id = source_file.file_id
|
||||
parser_name = parse_file_type_to_parser[source_file.parse_file_type]
|
||||
project_name = source_file.project_name
|
||||
if not parser_name or not project_name:
|
||||
raise Exception(f"Did not find parse_file_type or project_name "
|
||||
f"in SourceFile for {source_file.file_id}")
|
||||
if project_name not in self.project_parser_files:
|
||||
self.project_parser_files[project_name] = {}
|
||||
if parser_name not in self.project_parser_files[project_name]:
|
||||
self.project_parser_files[project_name][parser_name] = []
|
||||
if (file_id not in self.project_parser_files[project_name][parser_name] and
|
||||
file_id not in self.file_diff['deleted']):
|
||||
self.project_parser_files[project_name][parser_name].append(file_id)
|
||||
|
||||
# Add new files, including schema files
|
||||
def add_to_saved(self, file_id):
|
||||
# add file object to saved manifest.files
|
||||
source_file = self.new_files[file_id]
|
||||
if source_file.parse_file_type == ParseFileType.Schema:
|
||||
self.handle_added_schema_file(source_file)
|
||||
self.saved_files[file_id] = source_file
|
||||
# update pp_files to parse
|
||||
self.add_to_pp_files(source_file)
|
||||
logger.debug(f"Partial parsing: added file: {file_id}")
|
||||
|
||||
def handle_added_schema_file(self, source_file):
|
||||
source_file.pp_dict = source_file.dict_from_yaml.copy()
|
||||
if 'sources' in source_file.pp_dict:
|
||||
for source in source_file.pp_dict['sources']:
|
||||
# We need to remove the original source, so it can
|
||||
# be properly patched
|
||||
if 'overrides' in source:
|
||||
self.remove_source_override_target(source)
|
||||
|
||||
# Deletes for all non-schema files
|
||||
def delete_from_saved(self, file_id):
|
||||
# Look at all things touched by file, remove those
|
||||
# nodes, and update pp_files to parse unless the
|
||||
# file creating those nodes has also been deleted
|
||||
saved_source_file = self.saved_files[file_id]
|
||||
|
||||
# SQL file: models, seeds, snapshots, analyses, tests: SQL files, except
|
||||
# macros/tests
|
||||
if saved_source_file.parse_file_type in mssat_files:
|
||||
self.remove_mssat_file(saved_source_file)
|
||||
self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)
|
||||
|
||||
# macros
|
||||
if saved_source_file.parse_file_type == ParseFileType.Macro:
|
||||
self.delete_macro_file(saved_source_file)
|
||||
|
||||
# docs
|
||||
if saved_source_file.parse_file_type == ParseFileType.Documentation:
|
||||
self.delete_doc_node(saved_source_file)
|
||||
|
||||
logger.debug(f"Partial parsing: deleted file: {file_id}")
|
||||
|
||||
# Updates for non-schema files
|
||||
def update_in_saved(self, file_id):
|
||||
new_source_file = self.new_files[file_id]
|
||||
old_source_file = self.saved_files[file_id]
|
||||
|
||||
if new_source_file.parse_file_type in mssat_files:
|
||||
self.update_mssat_in_saved(new_source_file, old_source_file)
|
||||
elif new_source_file.parse_file_type == ParseFileType.Macro:
|
||||
self.update_macro_in_saved(new_source_file, old_source_file)
|
||||
elif new_source_file.parse_file_type == ParseFileType.Documentation:
|
||||
self.update_doc_in_saved(new_source_file, old_source_file)
|
||||
else:
|
||||
raise Exception(f"Invalid parse_file_type in source_file {file_id}")
|
||||
logger.debug(f"Partial parsing: updated file: {file_id}")
|
||||
|
||||
# Models, seeds, snapshots: patches and tests
|
||||
# analyses: patches, no tests
|
||||
# tests: not touched by schema files (no patches, no tests)
|
||||
# Updated schema files should have been processed already.
|
||||
def update_mssat_in_saved(self, new_source_file, old_source_file):
|
||||
|
||||
# These files only have one node.
|
||||
unique_id = old_source_file.nodes[0]
|
||||
|
||||
# replace source_file in saved and add to parsing list
|
||||
file_id = new_source_file.file_id
|
||||
self.deleted_manifest.files[file_id] = old_source_file
|
||||
self.saved_files[file_id] = new_source_file
|
||||
self.add_to_pp_files(new_source_file)
|
||||
self.remove_node_in_saved(new_source_file, unique_id)
|
||||
|
||||
def remove_node_in_saved(self, source_file, unique_id):
|
||||
# delete node in saved
|
||||
node = self.saved_manifest.nodes.pop(unique_id)
|
||||
self.deleted_manifest.nodes[unique_id] = node
|
||||
|
||||
# look at patch_path in model node to see if we need
|
||||
# to reapply a patch from a schema_file.
|
||||
if node.patch_path:
|
||||
file_id = node.patch_path
|
||||
# it might be changed... then what?
|
||||
if file_id not in self.file_diff['deleted']:
|
||||
# schema_files should already be updated
|
||||
schema_file = self.saved_files[file_id]
|
||||
dict_key = parse_file_type_to_key[source_file.parse_file_type]
|
||||
# look for a matching list dictionary
|
||||
for elem in schema_file.dict_from_yaml[dict_key]:
|
||||
if elem['name'] == node.name:
|
||||
elem_patch = elem
|
||||
break
|
||||
if elem_patch:
|
||||
self.delete_schema_mssa_links(schema_file, dict_key, elem_patch)
|
||||
self.merge_patch(schema_file, dict_key, elem_patch)
|
||||
if unique_id in schema_file.node_patches:
|
||||
schema_file.node_patches.remove(unique_id)
|
||||
|
||||
def update_macro_in_saved(self, new_source_file, old_source_file):
|
||||
self.handle_macro_file_links(old_source_file)
|
||||
file_id = new_source_file.file_id
|
||||
self.saved_files[file_id] = new_source_file
|
||||
self.add_to_pp_files(new_source_file)
|
||||
|
||||
def update_doc_in_saved(self, new_source_file, old_source_file):
|
||||
self.delete_doc_node(old_source_file)
|
||||
self.saved_files[new_source_file.file_id] = new_source_file
|
||||
self.add_to_pp_files(new_source_file)
|
||||
|
||||
def remove_mssat_file(self, source_file):
|
||||
# nodes [unique_ids] -- SQL files
|
||||
# There should always be a node for a SQL file
|
||||
if not source_file.nodes:
|
||||
raise Exception(f"No nodes found for source file {source_file.file_id}")
|
||||
# There is generally only 1 node for SQL files, except for macros
|
||||
for unique_id in source_file.nodes:
|
||||
self.remove_node_in_saved(source_file, unique_id)
|
||||
self.schedule_referencing_nodes_for_parsing(unique_id)
|
||||
|
||||
# We need to re-parse nodes that reference another removed node
|
||||
def schedule_referencing_nodes_for_parsing(self, unique_id):
|
||||
# Look at "children", i.e. nodes that reference this node
|
||||
self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
|
||||
|
||||
def schedule_nodes_for_parsing(self, unique_ids):
|
||||
for unique_id in unique_ids:
|
||||
if unique_id in self.saved_manifest.nodes:
|
||||
node = self.saved_manifest.nodes[unique_id]
|
||||
if node.resource_type == NodeType.Test:
|
||||
# test nodes are handled separately. Must be removed from schema file
|
||||
continue
|
||||
file_id = node.file_id
|
||||
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
|
||||
source_file = self.saved_files[file_id]
|
||||
self.remove_mssat_file(source_file)
|
||||
# content of non-schema files is only in new files
|
||||
self.saved_files[file_id] = self.new_files[file_id]
|
||||
self.add_to_pp_files(self.saved_files[file_id])
|
||||
elif unique_id in self.saved_manifest.sources:
|
||||
source = self.saved_manifest.sources[unique_id]
|
||||
file_id = source.file_id
|
||||
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
|
||||
schema_file = self.saved_files[file_id]
|
||||
sources = []
|
||||
if 'sources' in schema_file.dict_from_yaml:
|
||||
sources = schema_file.dict_from_yaml['sources']
|
||||
source_element = self.get_schema_element(sources, source.source_name)
|
||||
if source_element:
|
||||
self.delete_schema_source(schema_file, source_element)
|
||||
self.remove_tests(schema_file, source_element['name'])
|
||||
self.merge_patch(schema_file, 'sources', source_element)
|
||||
elif unique_id in self.saved_manifest.exposures:
|
||||
exposure = self.saved_manifest.exposures[unique_id]
|
||||
file_id = exposure.file_id
|
||||
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
|
||||
schema_file = self.saved_files[file_id]
|
||||
exposures = []
|
||||
if 'exposures' in schema_file.dict_from_yaml:
|
||||
exposures = schema_file.dict_from_yaml['exposures']
|
||||
exposure_element = self.get_schema_element(exposures, exposure.name)
|
||||
if exposure_element:
|
||||
self.delete_schema_exposure(schema_file, exposure_element)
|
||||
self.merge_patch(schema_file, 'exposures', exposure_element)
|
||||
elif unique_id in self.saved_manifest.macros:
|
||||
macro = self.saved_manifest.macros[unique_id]
|
||||
file_id = macro.file_id
|
||||
if file_id in self.saved_files and file_id not in self.file_diff['deleted']:
|
||||
source_file = self.saved_files[file_id]
|
||||
self.delete_macro_file(source_file)
|
||||
self.saved_files[file_id] = self.new_files[file_id]
|
||||
self.add_to_pp_files(self.saved_files[file_id])
|
||||
|
||||
def delete_macro_file(self, source_file):
|
||||
self.handle_macro_file_links(source_file)
|
||||
file_id = source_file.file_id
|
||||
self.deleted_manifest.files[file_id] = self.saved_files.pop(file_id)
|
||||
|
||||
def handle_macro_file_links(self, source_file):
|
||||
# remove the macros in the 'macros' dictionary
|
||||
for unique_id in source_file.macros:
|
||||
base_macro = self.saved_manifest.macros.pop(unique_id)
|
||||
self.deleted_manifest.macros[unique_id] = base_macro
|
||||
# loop through all macros, finding references to this macro: macro.depends_on.macros
|
||||
for macro in self.saved_manifest.macros.values():
|
||||
for macro_unique_id in macro.depends_on.macros:
|
||||
if (macro_unique_id == unique_id and
|
||||
macro_unique_id in self.saved_manifest.macros):
|
||||
# schedule file for parsing
|
||||
dep_file_id = macro.file_id
|
||||
if dep_file_id in self.saved_files:
|
||||
source_file = self.saved_files[dep_file_id]
|
||||
dep_macro = self.saved_manifest.macros.pop(macro.unique_id)
|
||||
self.deleted_manifest.macros[macro.unqiue_id] = dep_macro
|
||||
self.add_to_pp_files(source_file)
|
||||
break
|
||||
# loop through all nodes, finding references to this macro: node.depends_on.macros
|
||||
for node in self.saved_manifest.nodes.values():
|
||||
for macro_unique_id in node.depends_on.macros:
|
||||
if (macro_unique_id == unique_id and
|
||||
macro_unique_id in self.saved_manifest.macros):
|
||||
# schedule file for parsing
|
||||
dep_file_id = node.file_id
|
||||
if dep_file_id in self.saved_files:
|
||||
source_file = self.saved_files[dep_file_id]
|
||||
self.remove_node_in_saved(source_file, node.unique_id)
|
||||
self.add_to_pp_files(source_file)
|
||||
break
|
||||
if base_macro.patch_path:
|
||||
file_id = base_macro.patch_path
|
||||
if file_id in self.saved_files:
|
||||
schema_file = self.saved_files[file_id]
|
||||
macro_patches = []
|
||||
if 'macros' in schema_file.dict_from_yaml:
|
||||
macro_patches = schema_file.dict_from_yaml['macros']
|
||||
macro_patch = self.get_schema_element(macro_patches, base_macro.name)
|
||||
self.delete_schema_macro_patch(schema_file, macro_patch)
|
||||
self.merge_patch(schema_file, 'macros', macro_patch)
|
||||
|
||||
def delete_doc_node(self, source_file):
|
||||
# remove the nodes in the 'docs' dictionary
|
||||
docs = source_file.docs.copy()
|
||||
for unique_id in docs:
|
||||
self.deleted_manifest.docs[unique_id] = self.saved_manifest.docs.pop(unique_id)
|
||||
source_file.docs.remove(unique_id)
|
||||
# The unique_id of objects that contain a doc call are stored in the
|
||||
# doc source_file.nodes
|
||||
self.schedule_nodes_for_parsing(source_file.nodes)
|
||||
source_file.nodes = []
|
||||
|
||||
# Schema files -----------------------
|
||||
# Changed schema files
|
||||
def change_schema_file(self, file_id):
|
||||
saved_schema_file = self.saved_files[file_id]
|
||||
new_schema_file = self.new_files[file_id]
|
||||
saved_yaml_dict = saved_schema_file.dict_from_yaml
|
||||
new_yaml_dict = new_schema_file.dict_from_yaml
|
||||
saved_schema_file.pp_dict = {"version": saved_yaml_dict['version']}
|
||||
self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
|
||||
|
||||
# copy from new schema_file to saved_schema_file to preserve references
|
||||
# that weren't removed
|
||||
saved_schema_file.contents = new_schema_file.contents
|
||||
saved_schema_file.checksum = new_schema_file.checksum
|
||||
saved_schema_file.dfy = new_schema_file.dfy
|
||||
# schedule parsing
|
||||
self.add_to_pp_files(saved_schema_file)
|
||||
# schema_file pp_dict should have been generated already
|
||||
logger.debug(f"Partial parsing: update schema file: {file_id}")
|
||||
|
||||
# Delete schema files -- a variation on change_schema_file
|
||||
def delete_schema_file(self, file_id):
|
||||
saved_schema_file = self.saved_files[file_id]
|
||||
saved_yaml_dict = saved_schema_file.dict_from_yaml
|
||||
new_yaml_dict = {}
|
||||
self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
|
||||
self.deleted_manifest.files[file_id] = self.saved_manifest.files.pop(file_id)
|
||||
|
||||
# For each key in a schema file dictionary, process the changed, deleted, and added
|
||||
# elemnts for the key lists
|
||||
def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict):
|
||||
# loop through comparing previous dict_from_yaml with current dict_from_yaml
|
||||
# Need to do the deleted/added/changed thing, just like the files lists
|
||||
|
||||
# models, seeds, snapshots, analyses
|
||||
for dict_key in ['models', 'seeds', 'snapshots', 'analyses']:
|
||||
key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
|
||||
if key_diff['changed']:
|
||||
for elem in key_diff['changed']:
|
||||
self.delete_schema_mssa_links(schema_file, dict_key, elem)
|
||||
self.merge_patch(schema_file, dict_key, elem)
|
||||
if key_diff['deleted']:
|
||||
for elem in key_diff['deleted']:
|
||||
self.delete_schema_mssa_links(schema_file, dict_key, elem)
|
||||
if key_diff['added']:
|
||||
for elem in key_diff['added']:
|
||||
self.merge_patch(schema_file, dict_key, elem)
|
||||
|
||||
# sources
|
||||
source_diff = self.get_diff_for('sources', saved_yaml_dict, new_yaml_dict)
|
||||
if source_diff['changed']:
|
||||
for source in source_diff['changed']:
|
||||
if 'overrides' in source: # This is a source patch; need to re-parse orig source
|
||||
self.remove_source_override_target(source)
|
||||
self.delete_schema_source(schema_file, source)
|
||||
self.remove_tests(schema_file, source['name'])
|
||||
self.merge_patch(schema_file, 'sources', source)
|
||||
if source_diff['deleted']:
|
||||
for source in source_diff['deleted']:
|
||||
if 'overrides' in source: # This is a source patch; need to re-parse orig source
|
||||
self.remove_source_override_target(source)
|
||||
self.delete_schema_source(schema_file, source)
|
||||
self.remove_tests(schema_file, source['name'])
|
||||
if source_diff['added']:
|
||||
for source in source_diff['added']:
|
||||
if 'overrides' in source: # This is a source patch; need to re-parse orig source
|
||||
self.remove_source_override_target(source)
|
||||
self.merge_patch(schema_file, 'sources', source)
|
||||
|
||||
# macros
|
||||
macro_diff = self.get_diff_for('macros', saved_yaml_dict, new_yaml_dict)
|
||||
if macro_diff['changed']:
|
||||
for macro in macro_diff['changed']:
|
||||
self.delete_schema_macro_patch(schema_file, macro)
|
||||
self.merge_patch(schema_file, 'macros', macro)
|
||||
if macro_diff['deleted']:
|
||||
for macro in macro_diff['deleted']:
|
||||
self.delete_schema_macro_patch(schema_file, macro)
|
||||
if macro_diff['added']:
|
||||
for macro in macro_diff['added']:
|
||||
self.merge_patch(schema_file, 'macros', macro)
|
||||
|
||||
# exposures
|
||||
exposure_diff = self.get_diff_for('exposures', saved_yaml_dict, new_yaml_dict)
|
||||
if exposure_diff['changed']:
|
||||
for exposure in exposure_diff['changed']:
|
||||
self.delete_schema_exposure(schema_file, exposure)
|
||||
self.merge_patch(schema_file, 'exposures', exposure)
|
||||
if exposure_diff['deleted']:
|
||||
for exposure in exposure_diff['deleted']:
|
||||
self.delete_schema_exposure(schema_file, exposure)
|
||||
if exposure_diff['added']:
|
||||
for exposure in exposure_diff['added']:
|
||||
self.merge_patch(schema_file, 'exposures', exposure)
|
||||
|
||||
# Take a "section" of the schema file yaml dictionary from saved and new schema files
|
||||
# and determine which parts have changed
|
||||
def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict):
|
||||
if key in saved_yaml_dict or key in new_yaml_dict:
|
||||
saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else []
|
||||
new_elements = new_yaml_dict[key] if key in new_yaml_dict else []
|
||||
else:
|
||||
return {'deleted': [], 'added': [], 'changed': []}
|
||||
# for each set of keys, need to create a dictionary of names pointing to entry
|
||||
saved_elements_by_name = {}
|
||||
new_elements_by_name = {}
|
||||
# sources have two part names?
|
||||
for element in saved_elements:
|
||||
saved_elements_by_name[element['name']] = element
|
||||
for element in new_elements:
|
||||
new_elements_by_name[element['name']] = element
|
||||
|
||||
# now determine which elements, by name, are added, deleted or changed
|
||||
saved_element_names = set(saved_elements_by_name.keys())
|
||||
new_element_names = set(new_elements_by_name.keys())
|
||||
deleted = saved_element_names.difference(new_element_names)
|
||||
added = new_element_names.difference(saved_element_names)
|
||||
common = saved_element_names.intersection(new_element_names)
|
||||
changed = []
|
||||
for element_name in common:
|
||||
if saved_elements_by_name[element_name] != new_elements_by_name[element_name]:
|
||||
changed.append(element_name)
|
||||
|
||||
# make lists of yaml elements to return as diffs
|
||||
deleted_elements = [saved_elements_by_name[name].copy() for name in deleted]
|
||||
added_elements = [new_elements_by_name[name].copy() for name in added]
|
||||
changed_elements = [new_elements_by_name[name].copy() for name in changed]
|
||||
|
||||
diff = {
|
||||
"deleted": deleted_elements,
|
||||
"added": added_elements,
|
||||
"changed": changed_elements,
|
||||
}
|
||||
return diff
|
||||
|
||||
# Merge a patch file into the pp_dict in a schema file
|
||||
def merge_patch(self, schema_file, key, patch):
|
||||
if not schema_file.pp_dict:
|
||||
schema_file.pp_dict = {"version": schema_file.dict_from_yaml['version']}
|
||||
pp_dict = schema_file.pp_dict
|
||||
if key not in pp_dict:
|
||||
pp_dict[key] = [patch]
|
||||
else:
|
||||
# check that this patch hasn't already been saved
|
||||
found = False
|
||||
for elem in pp_dict[key]:
|
||||
if elem['name'] == patch['name']:
|
||||
found = True
|
||||
if not found:
|
||||
pp_dict[key].append(patch)
|
||||
self.add_to_pp_files(schema_file)
|
||||
|
||||
# For model, seed, snapshot, analysis schema dictionary keys,
|
||||
# delete the patches and tests from the patch
|
||||
def delete_schema_mssa_links(self, schema_file, dict_key, elem):
|
||||
# find elem node unique_id in node_patches
|
||||
prefix = key_to_prefix[dict_key]
|
||||
elem_unique_id = ''
|
||||
for unique_id in schema_file.node_patches:
|
||||
if not unique_id.startswith(prefix):
|
||||
continue
|
||||
parts = unique_id.split('.')
|
||||
elem_name = parts[-1]
|
||||
if elem_name == elem['name']:
|
||||
elem_unique_id = unique_id
|
||||
break
|
||||
|
||||
# remove elem node and remove unique_id from node_patches
|
||||
if elem_unique_id:
|
||||
# might have been already removed
|
||||
if elem_unique_id in self.saved_manifest.nodes:
|
||||
node = self.saved_manifest.nodes.pop(elem_unique_id)
|
||||
self.deleted_manifest.nodes[elem_unique_id] = node
|
||||
# need to add the node source_file to pp_files
|
||||
file_id = node.file_id
|
||||
# need to copy new file to saved files in order to get content
|
||||
if file_id in self.new_files:
|
||||
self.saved_files[file_id] = self.new_files[file_id]
|
||||
if self.saved_files[file_id]:
|
||||
source_file = self.saved_files[file_id]
|
||||
self.add_to_pp_files(source_file)
|
||||
# remove from patches
|
||||
schema_file.node_patches.remove(elem_unique_id)
|
||||
|
||||
# for models, seeds, snapshots (not analyses)
|
||||
if dict_key in ['models', 'seeds', 'snapshots']:
|
||||
# find related tests and remove them
|
||||
self.remove_tests(schema_file, elem['name'])
|
||||
|
||||
def remove_tests(self, schema_file, name):
|
||||
tests = self.get_tests_for(schema_file, name)
|
||||
for test_unique_id in tests:
|
||||
node = self.saved_manifest.nodes.pop(test_unique_id)
|
||||
self.deleted_manifest.nodes[test_unique_id] = node
|
||||
schema_file.tests.remove(test_unique_id)
|
||||
|
||||
# Create a pp_test_index in the schema file if it doesn't exist
|
||||
# and look for test names related to this yaml dict element name
|
||||
def get_tests_for(self, schema_file, name):
|
||||
if not schema_file.pp_test_index:
|
||||
pp_test_index = {}
|
||||
for test_unique_id in schema_file.tests:
|
||||
test_node = self.saved_manifest.nodes[test_unique_id]
|
||||
if test_node.sources:
|
||||
for source_ref in test_node.sources:
|
||||
source_name = source_ref[0]
|
||||
if source_name in pp_test_index:
|
||||
pp_test_index[source_name].append(test_unique_id)
|
||||
else:
|
||||
pp_test_index[source_name] = [test_unique_id]
|
||||
elif test_node.depends_on.nodes:
|
||||
tested_node_id = test_node.depends_on.nodes[0]
|
||||
parts = tested_node_id.split('.')
|
||||
elem_name = parts[-1]
|
||||
if elem_name in pp_test_index:
|
||||
pp_test_index[elem_name].append(test_unique_id)
|
||||
else:
|
||||
pp_test_index[elem_name] = [test_unique_id]
|
||||
elif (hasattr(test_node, 'test_metadata') and
|
||||
'model' in test_node.test_metadata.kwargs):
|
||||
(_, elem_name, _) = test_node.test_metadata.kwargs['model'].split("'")
|
||||
if elem_name:
|
||||
if elem_name in pp_test_index:
|
||||
pp_test_index[elem_name].append(test_unique_id)
|
||||
else:
|
||||
pp_test_index[elem_name] = [test_unique_id]
|
||||
schema_file.pp_test_index = pp_test_index
|
||||
if name in schema_file.pp_test_index:
|
||||
return schema_file.pp_test_index[name]
|
||||
return []
|
||||
|
||||
def delete_schema_source(self, schema_file, source_dict):
|
||||
# both patches, tests, and source nodes
|
||||
source_name = source_dict['name']
|
||||
# There may be multiple sources for each source dict, since
|
||||
# there will be a separate source node for each table.
|
||||
# ParsedSourceDefinition name = table name, dict name is source_name
|
||||
sources = schema_file.sources.copy()
|
||||
for unique_id in sources:
|
||||
if unique_id in self.saved_manifest.sources:
|
||||
source = self.saved_manifest.sources[unique_id]
|
||||
if source.source_name == source_name:
|
||||
source = self.saved_manifest.sources.pop(unique_id)
|
||||
self.deleted_manifest.sources[unique_id] = source
|
||||
schema_file.sources.remove(unique_id)
|
||||
self.schedule_referencing_nodes_for_parsing(unique_id)
|
||||
logger.debug(f"Partial parsing: deleted source {unique_id}")
|
||||
|
||||
def delete_schema_macro_patch(self, schema_file, macro):
|
||||
# This is just macro patches that need to be reapplied
|
||||
for unique_id in schema_file.macro_patches:
|
||||
parts = unique_id.split('.')
|
||||
macro_name = parts[-1]
|
||||
if macro_name == macro['name']:
|
||||
macro_unique_id = unique_id
|
||||
break
|
||||
if macro_unique_id and macro_unique_id in self.saved_manifest.macros:
|
||||
macro = self.saved_manifest.macros.pop(macro_unique_id)
|
||||
self.deleted_manifest.macros[macro_unique_id] = macro
|
||||
macro_file_id = macro.file_id
|
||||
self.add_to_pp_files(self.saved_files[macro_file_id])
|
||||
if macro_unique_id in schema_file.macro_patches:
|
||||
schema_file.macro_patches.remove(macro_unique_id)
|
||||
|
||||
# exposures are created only from schema files, so just delete
|
||||
# the exposure.
|
||||
def delete_schema_exposure(self, schema_file, exposure_dict):
|
||||
exposure_name = exposure_dict['name']
|
||||
exposures = schema_file.exposures.copy()
|
||||
for unique_id in exposures:
|
||||
exposure = self.saved_manifest.exposures[unique_id]
|
||||
if unique_id in self.saved_manifest.exposures:
|
||||
if exposure.name == exposure_name:
|
||||
self.deleted_manifest.exposures[unique_id] = \
|
||||
self.saved_manifest.exposures.pop(unique_id)
|
||||
schema_file.exposures.remove(unique_id)
|
||||
logger.debug(f"Partial parsing: deleted exposure {unique_id}")
|
||||
|
||||
def get_schema_element(self, elem_list, elem_name):
|
||||
for element in elem_list:
|
||||
if 'name' in element and element['name'] == elem_name:
|
||||
return element
|
||||
return None
|
||||
|
||||
def get_schema_file_for_source(self, package_name, source_name):
|
||||
schema_file = None
|
||||
for source in self.saved_manifest.sources.values():
|
||||
if source.package_name == package_name and source.source_name == source_name:
|
||||
file_id = source.file_id
|
||||
if file_id in self.saved_files:
|
||||
schema_file = self.saved_files[file_id]
|
||||
break
|
||||
return schema_file
|
||||
|
||||
def get_source_override_file_and_dict(self, source):
|
||||
package = source['overrides']
|
||||
source_name = source['name']
|
||||
orig_source_schema_file = self.get_schema_file_for_source(package, source_name)
|
||||
orig_sources = orig_source_schema_file.dict_from_yaml['sources']
|
||||
orig_source = self.get_schema_element(orig_sources, source_name)
|
||||
return (orig_source_schema_file, orig_source)
|
||||
|
||||
def remove_source_override_target(self, source_dict):
|
||||
(orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict)
|
||||
if orig_source:
|
||||
self.delete_schema_source(orig_file, orig_source)
|
||||
self.remove_tests(orig_file, orig_source['name'])
|
||||
self.merge_patch(orig_file, 'sources', orig_source)
|
||||
self.add_to_pp_files(orig_file)
|
||||
@@ -1,18 +1,24 @@
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.contracts.files import FilePath, ParseFileType, SourceFile, FileHash
|
||||
from dbt.contracts.files import (
|
||||
FilePath, ParseFileType, SourceFile, FileHash, AnySourceFile, SchemaSourceFile
|
||||
)
|
||||
|
||||
from dbt.parser.schemas import yaml_from_file
|
||||
from dbt.parser.search import FilesystemSearcher
|
||||
|
||||
|
||||
# This loads the files contents and creates the SourceFile object
|
||||
def load_source_file(
|
||||
path: FilePath, parse_file_type: ParseFileType,
|
||||
project_name: str) -> SourceFile:
|
||||
project_name: str) -> AnySourceFile:
|
||||
file_contents = load_file_contents(path.absolute_path, strip=False)
|
||||
checksum = FileHash.from_contents(file_contents)
|
||||
source_file = SourceFile(path=path, checksum=checksum,
|
||||
parse_file_type=parse_file_type, project_name=project_name)
|
||||
sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile
|
||||
source_file = sf_cls(path=path, checksum=checksum,
|
||||
parse_file_type=parse_file_type, project_name=project_name)
|
||||
source_file.contents = file_contents.strip()
|
||||
if parse_file_type == ParseFileType.Schema:
|
||||
source_file.dfy = yaml_from_file(source_file)
|
||||
return source_file
|
||||
|
||||
|
||||
@@ -55,8 +61,8 @@ def read_files_for_parser(project, files, dirs, extension, parse_ft):
|
||||
project, dirs, extension, parse_ft
|
||||
)
|
||||
for sf in source_files:
|
||||
files[sf.search_key] = sf
|
||||
parser_files.append(sf.search_key)
|
||||
files[sf.file_id] = sf
|
||||
parser_files.append(sf.file_id)
|
||||
return parser_files
|
||||
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ class RPCCallParser(SimpleSQLParser[ParsedRPCNode]):
|
||||
return os.path.join('rpc', block.name)
|
||||
|
||||
def parse_remote(self, sql: str, name: str) -> ParsedRPCNode:
|
||||
source_file = SourceFile.remote(contents=sql)
|
||||
source_file = SourceFile.remote(sql, self.project.project_name)
|
||||
contents = RPCBlock(rpc_name=name, file=source_file)
|
||||
return self.parse_node(contents)
|
||||
|
||||
|
||||
@@ -41,16 +41,21 @@ def get_nice_schema_test_name(
|
||||
clean_flat_args = [re.sub('[^0-9a-zA-Z_]+', '_', arg) for arg in flat_args]
|
||||
unique = "__".join(clean_flat_args)
|
||||
|
||||
cutoff = 32
|
||||
if len(unique) <= cutoff:
|
||||
label = unique
|
||||
# for the file path + alias, the name must be <64 characters
|
||||
# if the full name is too long, include the first 30 identifying chars plus
|
||||
# a 32-character hash of the full contents
|
||||
|
||||
test_identifier = '{}_{}'.format(test_type, test_name)
|
||||
full_name = '{}_{}'.format(test_identifier, unique)
|
||||
|
||||
if len(full_name) >= 64:
|
||||
test_trunc_identifier = test_identifier[:30]
|
||||
label = hashlib.md5(full_name.encode('utf-8')).hexdigest()
|
||||
short_name = '{}_{}'.format(test_trunc_identifier, label)
|
||||
else:
|
||||
label = hashlib.md5(unique.encode('utf-8')).hexdigest()
|
||||
short_name = full_name
|
||||
|
||||
filename = '{}_{}_{}'.format(test_type, test_name, label)
|
||||
name = '{}_{}_{}'.format(test_type, test_name, unique)
|
||||
|
||||
return filename, name
|
||||
return short_name, full_name
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -185,7 +190,10 @@ class TestBuilder(Generic[Testable]):
|
||||
r'(?P<test_name>([a-zA-Z_][0-9a-zA-Z_]*))'
|
||||
)
|
||||
# kwargs representing test configs
|
||||
MODIFIER_ARGS = ('severity', 'tags', 'enabled')
|
||||
MODIFIER_ARGS = (
|
||||
'severity', 'tags', 'enabled', 'where', 'limit', 'warn_if', 'error_if',
|
||||
'fail_calc', 'store_failures'
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -231,6 +239,10 @@ class TestBuilder(Generic[Testable]):
|
||||
self.compiled_name: str = compiled_name
|
||||
self.fqn_name: str = fqn_name
|
||||
|
||||
# use hashed name as alias if too long
|
||||
if compiled_name != fqn_name:
|
||||
self.modifiers['alias'] = compiled_name
|
||||
|
||||
def _bad_type(self) -> TypeError:
|
||||
return TypeError('invalid target type "{}"'.format(type(self.target)))
|
||||
|
||||
@@ -268,9 +280,15 @@ class TestBuilder(Generic[Testable]):
|
||||
test_args['column_name'] = name
|
||||
return test_name, test_args
|
||||
|
||||
@property
|
||||
def enabled(self) -> Optional[bool]:
|
||||
return self.modifiers.get('enabled')
|
||||
|
||||
@property
|
||||
def alias(self) -> Optional[str]:
|
||||
return self.modifiers.get('alias')
|
||||
|
||||
@property
|
||||
def severity(self) -> Optional[str]:
|
||||
sev = self.modifiers.get('severity')
|
||||
if sev:
|
||||
@@ -278,6 +296,30 @@ class TestBuilder(Generic[Testable]):
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def store_failures(self) -> Optional[bool]:
|
||||
return self.modifiers.get('store_failures')
|
||||
|
||||
@property
|
||||
def where(self) -> Optional[str]:
|
||||
return self.modifiers.get('where')
|
||||
|
||||
@property
|
||||
def limit(self) -> Optional[int]:
|
||||
return self.modifiers.get('limit')
|
||||
|
||||
@property
|
||||
def warn_if(self) -> Optional[str]:
|
||||
return self.modifiers.get('warn_if')
|
||||
|
||||
@property
|
||||
def error_if(self) -> Optional[str]:
|
||||
return self.modifiers.get('error_if')
|
||||
|
||||
@property
|
||||
def fail_calc(self) -> Optional[str]:
|
||||
return self.modifiers.get('fail_calc')
|
||||
|
||||
def tags(self) -> List[str]:
|
||||
tags = self.modifiers.get('tags', [])
|
||||
if isinstance(tags, str):
|
||||
@@ -313,7 +355,8 @@ class TestBuilder(Generic[Testable]):
|
||||
|
||||
def construct_config(self) -> str:
|
||||
configs = ",".join([
|
||||
f"{key}=" + (f"'{value}'" if isinstance(value, str) else str(value))
|
||||
f"{key}=" + (f"'{value}'" if isinstance(value, str)
|
||||
else str(value))
|
||||
for key, value
|
||||
in self.modifiers.items()
|
||||
])
|
||||
@@ -326,7 +369,7 @@ class TestBuilder(Generic[Testable]):
|
||||
# of the test macro
|
||||
def build_raw_sql(self) -> str:
|
||||
return (
|
||||
"{config}{{{{ {macro}(**{kwargs_name}) }}}}"
|
||||
"{{{{ {macro}(**{kwargs_name}) }}}}{config}"
|
||||
).format(
|
||||
macro=self.macro_name(),
|
||||
config=self.construct_config(),
|
||||
@@ -334,10 +377,13 @@ class TestBuilder(Generic[Testable]):
|
||||
)
|
||||
|
||||
def build_model_str(self):
|
||||
targ = self.target
|
||||
cfg_where = "config.get('where')"
|
||||
if isinstance(self.target, UnparsedNodeUpdate):
|
||||
fmt = "{{{{ ref('{0.name}') }}}}"
|
||||
identifier = self.target.name
|
||||
target_str = f"{{{{ ref('{targ.name}') }}}}"
|
||||
elif isinstance(self.target, UnpatchedSourceDefinition):
|
||||
fmt = "{{{{ source('{0.source.name}', '{0.table.name}') }}}}"
|
||||
else:
|
||||
raise self._bad_type()
|
||||
return fmt.format(self.target)
|
||||
identifier = self.target.table.name
|
||||
target_str = f"{{{{ source('{targ.source.name}', '{targ.table.name}') }}}}"
|
||||
filtered = f"(select * from {target_str} where {{{{{cfg_where}}}}}) {identifier}"
|
||||
return f"{{% if {cfg_where} %}}{filtered}{{% else %}}{target_str}{{% endif %}}"
|
||||
|
||||
@@ -2,6 +2,7 @@ import itertools
|
||||
import os
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from hashlib import md5
|
||||
from typing import (
|
||||
Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type
|
||||
)
|
||||
@@ -13,10 +14,7 @@ from dbt.clients.jinja import get_rendered, add_rendered_test_kwargs
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.config.renderer import SchemaYamlRenderer
|
||||
from dbt.context.context_config import (
|
||||
BaseContextConfigGenerator,
|
||||
ContextConfig,
|
||||
ContextConfigGenerator,
|
||||
UnrenderedConfigGenerator,
|
||||
)
|
||||
from dbt.context.configured import generate_schema_yml
|
||||
from dbt.context.target import generate_target_context
|
||||
@@ -25,11 +23,9 @@ from dbt.context.providers import (
|
||||
)
|
||||
from dbt.context.macro_resolver import MacroResolver
|
||||
from dbt.contracts.files import FileHash
|
||||
from dbt.contracts.graph.manifest import SourceFile
|
||||
from dbt.contracts.graph.model_config import SourceConfig
|
||||
from dbt.contracts.graph.manifest import SchemaSourceFile
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedNodePatch,
|
||||
ParsedSourceDefinition,
|
||||
ColumnInfo,
|
||||
ParsedSchemaTestNode,
|
||||
ParsedMacroPatch,
|
||||
@@ -37,7 +33,6 @@ from dbt.contracts.graph.parsed import (
|
||||
ParsedExposure,
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import (
|
||||
FreshnessThreshold,
|
||||
HasColumnDocs,
|
||||
HasColumnTests,
|
||||
HasDocs,
|
||||
@@ -52,7 +47,7 @@ from dbt.contracts.graph.unparsed import (
|
||||
from dbt.exceptions import (
|
||||
validator_error_message, JSONValidationException,
|
||||
raise_invalid_schema_yml_version, ValidationException,
|
||||
CompilationException, InternalException
|
||||
CompilationException,
|
||||
)
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleParser
|
||||
@@ -96,6 +91,23 @@ def error_context(
|
||||
)
|
||||
|
||||
|
||||
def yaml_from_file(
|
||||
source_file: SchemaSourceFile
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""If loading the yaml fails, raise an exception.
|
||||
"""
|
||||
path: str = source_file.path.relative_path
|
||||
try:
|
||||
return load_yaml_text(source_file.contents)
|
||||
except ValidationException as e:
|
||||
reason = validator_error_message(e)
|
||||
raise CompilationException(
|
||||
'Error reading {}: {} - {}'
|
||||
.format(source_file.project_name, path, reason)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class ParserRef:
|
||||
"""A helper object to hold parse-time references."""
|
||||
|
||||
@@ -145,17 +157,6 @@ def _trimmed(inp: str) -> str:
|
||||
return inp[:44] + '...' + inp[-3:]
|
||||
|
||||
|
||||
def merge_freshness(
|
||||
base: Optional[FreshnessThreshold], update: Optional[FreshnessThreshold]
|
||||
) -> Optional[FreshnessThreshold]:
|
||||
if base is not None and update is not None:
|
||||
return base.merged(update)
|
||||
elif base is None and update is not None:
|
||||
return update
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
def __init__(
|
||||
self, project, manifest, root_project,
|
||||
@@ -218,22 +219,6 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
path, 'version {} is not supported'.format(version)
|
||||
)
|
||||
|
||||
def _yaml_from_file(
|
||||
self, source_file: SourceFile
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""If loading the yaml fails, raise an exception.
|
||||
"""
|
||||
path: str = source_file.path.relative_path
|
||||
try:
|
||||
return load_yaml_text(source_file.contents)
|
||||
except ValidationException as e:
|
||||
reason = validator_error_message(e)
|
||||
raise CompilationException(
|
||||
'Error reading {}: {} - {}'
|
||||
.format(self.project.project_name, path, reason)
|
||||
)
|
||||
return None
|
||||
|
||||
def parse_column_tests(
|
||||
self, block: TestBlock, column: UnparsedColumn
|
||||
) -> None:
|
||||
@@ -243,101 +228,6 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
for test in column.tests:
|
||||
self.parse_test(block, test, column)
|
||||
|
||||
def _generate_source_config(self, fqn: List[str], rendered: bool):
|
||||
generator: BaseContextConfigGenerator
|
||||
if rendered:
|
||||
generator = ContextConfigGenerator(self.root_project)
|
||||
else:
|
||||
generator = UnrenderedConfigGenerator(
|
||||
self.root_project
|
||||
)
|
||||
|
||||
return generator.calculate_node_config(
|
||||
config_calls=[],
|
||||
fqn=fqn,
|
||||
resource_type=NodeType.Source,
|
||||
project_name=self.project.project_name,
|
||||
base=False,
|
||||
)
|
||||
|
||||
def _get_relation_name(self, node: ParsedSourceDefinition):
|
||||
adapter = get_adapter(self.root_project)
|
||||
relation_cls = adapter.Relation
|
||||
return str(relation_cls.create_from(self.root_project, node))
|
||||
|
||||
# This converts an UnpatchedSourceDefinition to a ParsedSourceDefinition
|
||||
# it is used by the SourcePatcher.
|
||||
def parse_source(
|
||||
self, target: UnpatchedSourceDefinition
|
||||
) -> ParsedSourceDefinition:
|
||||
source = target.source
|
||||
table = target.table
|
||||
refs = ParserRef.from_target(table)
|
||||
unique_id = target.unique_id
|
||||
description = table.description or ''
|
||||
meta = table.meta or {}
|
||||
source_description = source.description or ''
|
||||
loaded_at_field = table.loaded_at_field or source.loaded_at_field
|
||||
|
||||
freshness = merge_freshness(source.freshness, table.freshness)
|
||||
quoting = source.quoting.merged(table.quoting)
|
||||
# path = block.path.original_file_path
|
||||
source_meta = source.meta or {}
|
||||
|
||||
# make sure we don't do duplicate tags from source + table
|
||||
tags = sorted(set(itertools.chain(source.tags, table.tags)))
|
||||
|
||||
config = self._generate_source_config(
|
||||
fqn=target.fqn,
|
||||
rendered=True,
|
||||
)
|
||||
|
||||
unrendered_config = self._generate_source_config(
|
||||
fqn=target.fqn,
|
||||
rendered=False,
|
||||
)
|
||||
|
||||
if not isinstance(config, SourceConfig):
|
||||
raise InternalException(
|
||||
f'Calculated a {type(config)} for a source, but expected '
|
||||
f'a SourceConfig'
|
||||
)
|
||||
|
||||
default_database = self.root_project.credentials.database
|
||||
|
||||
parsed_source = ParsedSourceDefinition(
|
||||
package_name=target.package_name,
|
||||
database=(source.database or default_database),
|
||||
schema=(source.schema or source.name),
|
||||
identifier=(table.identifier or table.name),
|
||||
root_path=target.root_path,
|
||||
path=target.path,
|
||||
original_file_path=target.original_file_path,
|
||||
columns=refs.column_info,
|
||||
unique_id=unique_id,
|
||||
name=table.name,
|
||||
description=description,
|
||||
external=table.external,
|
||||
source_name=source.name,
|
||||
source_description=source_description,
|
||||
source_meta=source_meta,
|
||||
meta=meta,
|
||||
loader=source.loader,
|
||||
loaded_at_field=loaded_at_field,
|
||||
freshness=freshness,
|
||||
quoting=quoting,
|
||||
resource_type=NodeType.Source,
|
||||
fqn=target.fqn,
|
||||
tags=tags,
|
||||
config=config,
|
||||
unrendered_config=unrendered_config,
|
||||
)
|
||||
|
||||
# relation name is added after instantiation because the adapter does
|
||||
# not provide the relation name for a UnpatchedSourceDefinition object
|
||||
parsed_source.relation_name = self._get_relation_name(parsed_source)
|
||||
return parsed_source
|
||||
|
||||
def create_test_node(
|
||||
self,
|
||||
target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate],
|
||||
@@ -351,6 +241,25 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
column_name: Optional[str],
|
||||
) -> ParsedSchemaTestNode:
|
||||
|
||||
HASH_LENGTH = 10
|
||||
|
||||
# N.B: This function builds a hashable string from any given test_metadata dict.
|
||||
# it's a bit fragile for general use (only supports str, int, float, List, Dict)
|
||||
# but it gets the job done here without the overhead of complete ser(de).
|
||||
def get_hashable_md(
|
||||
data: Union[str, int, float, List, Dict]
|
||||
) -> Union[str, List, Dict]:
|
||||
if type(data) == dict:
|
||||
return {k: get_hashable_md(data[k]) for k in sorted(data.keys())} # type: ignore
|
||||
elif type(data) == list:
|
||||
return [get_hashable_md(val) for val in data] # type: ignore
|
||||
else:
|
||||
return str(data)
|
||||
|
||||
hashable_metadata = repr(get_hashable_md(test_metadata))
|
||||
hash_string = ''.join([name, hashable_metadata]).encode('utf-8')
|
||||
test_hash = md5(hash_string).hexdigest()[-HASH_LENGTH:]
|
||||
|
||||
dct = {
|
||||
'alias': name,
|
||||
'schema': self.default_schema,
|
||||
@@ -364,7 +273,7 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
'original_file_path': target.original_file_path,
|
||||
'package_name': self.project.project_name,
|
||||
'raw_sql': raw_sql,
|
||||
'unique_id': self.generate_unique_id(name),
|
||||
'unique_id': self.generate_unique_id(name, test_hash),
|
||||
'config': self.config_dict(config),
|
||||
'test_metadata': test_metadata,
|
||||
'column_name': column_name,
|
||||
@@ -467,11 +376,33 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
if (macro_unique_id in
|
||||
['macro.dbt.test_not_null', 'macro.dbt.test_unique']):
|
||||
self.update_parsed_node(node, config)
|
||||
if builder.severity() is not None:
|
||||
node.unrendered_config['severity'] = builder.severity()
|
||||
node.config['severity'] = builder.severity()
|
||||
if builder.enabled() is not None:
|
||||
node.config['enabled'] = builder.enabled()
|
||||
# manually set configs
|
||||
# note: this does not respect generate_alias_name() macro
|
||||
if builder.alias is not None:
|
||||
node.unrendered_config['alias'] = builder.alias
|
||||
node.config['alias'] = builder.alias
|
||||
node.alias = builder.alias
|
||||
if builder.severity is not None:
|
||||
node.unrendered_config['severity'] = builder.severity
|
||||
node.config['severity'] = builder.severity
|
||||
if builder.enabled is not None:
|
||||
node.unrendered_config['enabled'] = builder.enabled
|
||||
node.config['enabled'] = builder.enabled
|
||||
if builder.where is not None:
|
||||
node.unrendered_config['where'] = builder.where
|
||||
node.config['where'] = builder.where
|
||||
if builder.limit is not None:
|
||||
node.unrendered_config['limit'] = builder.limit
|
||||
node.config['limit'] = builder.limit
|
||||
if builder.warn_if is not None:
|
||||
node.unrendered_config['warn_if'] = builder.warn_if
|
||||
node.config['warn_if'] = builder.warn_if
|
||||
if builder.error_if is not None:
|
||||
node.unrendered_config['error_if'] = builder.error_if
|
||||
node.config['error_if'] = builder.error_if
|
||||
if builder.fail_calc is not None:
|
||||
node.unrendered_config['fail_calc'] = builder.fail_calc
|
||||
node.config['fail_calc'] = builder.fail_calc
|
||||
# source node tests are processed at patch_source time
|
||||
if isinstance(builder.target, UnpatchedSourceDefinition):
|
||||
sources = [builder.target.fqn[-2], builder.target.fqn[-1]]
|
||||
@@ -497,42 +428,6 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
msg = validator_error_message(exc)
|
||||
raise CompilationException(msg, node=node) from exc
|
||||
|
||||
def parse_source_test(
|
||||
self,
|
||||
target: UnpatchedSourceDefinition,
|
||||
test: Dict[str, Any],
|
||||
column: Optional[UnparsedColumn],
|
||||
) -> ParsedSchemaTestNode:
|
||||
column_name: Optional[str]
|
||||
if column is None:
|
||||
column_name = None
|
||||
else:
|
||||
column_name = column.name
|
||||
should_quote = (
|
||||
column.quote or
|
||||
(column.quote is None and target.quote_columns)
|
||||
)
|
||||
if should_quote:
|
||||
column_name = get_adapter(self.root_project).quote(column_name)
|
||||
|
||||
tags_sources = [target.source.tags, target.table.tags]
|
||||
if column is not None:
|
||||
tags_sources.append(column.tags)
|
||||
tags = list(itertools.chain.from_iterable(tags_sources))
|
||||
|
||||
node = self._parse_generic_test(
|
||||
target=target,
|
||||
test=test,
|
||||
tags=tags,
|
||||
column_name=column_name
|
||||
)
|
||||
# we can't go through result.add_node - no file... instead!
|
||||
if node.config.enabled:
|
||||
self.manifest.add_node_nofile(node)
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(node)
|
||||
return node
|
||||
|
||||
def parse_node(self, block: SchemaTestBlock) -> ParsedSchemaTestNode:
|
||||
"""In schema parsing, we rewrite most of the part of parse_node that
|
||||
builds the initial node to be parsed, but rendering is basically the
|
||||
@@ -601,20 +496,14 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
for test in block.tests:
|
||||
self.parse_test(block, test, None)
|
||||
|
||||
def parse_exposures(self, block: YamlBlock) -> None:
|
||||
parser = ExposureParser(self, block)
|
||||
for node in parser.parse():
|
||||
self.manifest.add_exposure(block.file, node)
|
||||
|
||||
def parse_file(self, block: FileBlock) -> None:
|
||||
dct = self._yaml_from_file(block.file)
|
||||
|
||||
# mark the file as seen, in Manifest.files
|
||||
self.manifest.get_file(block.file)
|
||||
def parse_file(self, block: FileBlock, dct: Dict = None) -> None:
|
||||
assert isinstance(block.file, SchemaSourceFile)
|
||||
if not dct:
|
||||
dct = yaml_from_file(block.file)
|
||||
|
||||
if dct:
|
||||
try:
|
||||
# This does a deep_map to check for circular references
|
||||
# This does a deep_map which will fail if there are circular references
|
||||
dct = self.raw_renderer.render_data(dct)
|
||||
except CompilationException as exc:
|
||||
raise CompilationException(
|
||||
@@ -659,21 +548,21 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
parser = SourceParser(self, yaml_block, 'sources')
|
||||
parser.parse()
|
||||
|
||||
# NonSourceParser.parse()
|
||||
# NonSourceParser.parse() (but never test_blocks)
|
||||
if 'macros' in dct:
|
||||
parser = MacroPatchParser(self, yaml_block, 'macros')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
parser.parse()
|
||||
|
||||
# NonSourceParser.parse()
|
||||
# NonSourceParser.parse() (but never test_blocks)
|
||||
if 'analyses' in dct:
|
||||
parser = AnalysisPatchParser(self, yaml_block, 'analyses')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
parser.parse()
|
||||
|
||||
# parse exposures
|
||||
if 'exposures' in dct:
|
||||
self.parse_exposures(yaml_block)
|
||||
exp_parser = ExposureParser(self, yaml_block)
|
||||
for node in exp_parser.parse():
|
||||
self.manifest.add_exposure(yaml_block.file, node)
|
||||
|
||||
|
||||
Parsed = TypeVar(
|
||||
@@ -797,7 +686,7 @@ class SourceParser(YamlDocsReader):
|
||||
fqn = self.schema_parser.get_fqn_prefix(fqn_path)
|
||||
fqn.extend([source.name, table.name])
|
||||
|
||||
result = UnpatchedSourceDefinition(
|
||||
source_def = UnpatchedSourceDefinition(
|
||||
source=source,
|
||||
table=table,
|
||||
path=original_file_path,
|
||||
@@ -808,7 +697,7 @@ class SourceParser(YamlDocsReader):
|
||||
resource_type=NodeType.Source,
|
||||
fqn=fqn,
|
||||
)
|
||||
self.manifest.add_source(self.yaml.file, result)
|
||||
self.manifest.add_source(self.yaml.file, source_def)
|
||||
|
||||
|
||||
# This class has three main subclasses: TestablePatchParser (models,
|
||||
@@ -854,6 +743,7 @@ class NonSourceParser(YamlDocsReader, Generic[NonSourceTarget, Parsed]):
|
||||
# This adds the node_block to self.manifest
|
||||
# as a ParsedNodePatch or ParsedMacroPatch
|
||||
self.parse_patch(node_block, refs)
|
||||
# This will always be empty if the node a macro or analysis
|
||||
return test_blocks
|
||||
|
||||
def get_unparsed_target(self) -> Iterable[NonSourceTarget]:
|
||||
@@ -965,6 +855,8 @@ class ExposureParser(YamlReader):
|
||||
name=unparsed.name,
|
||||
type=unparsed.type,
|
||||
url=unparsed.url,
|
||||
meta=unparsed.meta,
|
||||
tags=unparsed.tags,
|
||||
description=unparsed.description,
|
||||
owner=unparsed.owner,
|
||||
maturity=unparsed.maturity,
|
||||
|
||||
@@ -7,7 +7,7 @@ from typing import (
|
||||
from dbt.clients.jinja import extract_toplevel_blocks, BlockTag
|
||||
from dbt.clients.system import find_matching
|
||||
from dbt.config import Project
|
||||
from dbt.contracts.files import SourceFile, FilePath
|
||||
from dbt.contracts.files import FilePath, AnySourceFile
|
||||
from dbt.exceptions import CompilationException, InternalException
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ from dbt.exceptions import CompilationException, InternalException
|
||||
# Could it be removed?
|
||||
@dataclass
|
||||
class FileBlock:
|
||||
file: SourceFile
|
||||
file: AnySourceFile
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
@@ -37,7 +37,7 @@ class FileBlock:
|
||||
# difference is what 'contents' returns?
|
||||
@dataclass
|
||||
class BlockContents(FileBlock):
|
||||
file: SourceFile # if you remove this, mypy will get upset
|
||||
file: AnySourceFile # if you remove this, mypy will get upset
|
||||
block: BlockTag
|
||||
|
||||
@property
|
||||
@@ -51,7 +51,7 @@ class BlockContents(FileBlock):
|
||||
|
||||
@dataclass
|
||||
class FullBlock(FileBlock):
|
||||
file: SourceFile # if you remove this, mypy will get upset
|
||||
file: AnySourceFile # if you remove this, mypy will get upset
|
||||
block: BlockTag
|
||||
|
||||
@property
|
||||
@@ -93,7 +93,7 @@ Block = Union[BlockContents, FullBlock]
|
||||
|
||||
BlockSearchResult = TypeVar('BlockSearchResult', BlockContents, FullBlock)
|
||||
|
||||
BlockSearchResultFactory = Callable[[SourceFile, BlockTag], BlockSearchResult]
|
||||
BlockSearchResultFactory = Callable[[AnySourceFile, BlockTag], BlockSearchResult]
|
||||
|
||||
|
||||
class BlockSearcher(Generic[BlockSearchResult], Iterable[BlockSearchResult]):
|
||||
|
||||
@@ -78,7 +78,3 @@ class SnapshotParser(
|
||||
)
|
||||
for block in blocks:
|
||||
self.parse_node(block)
|
||||
# in case there are no snapshots declared, we still want to mark this
|
||||
# file as seen. But after we've finished, because we don't want to add
|
||||
# files with syntax errors
|
||||
self.manifest.get_file(file_block.file)
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import itertools
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Iterable,
|
||||
Dict,
|
||||
Optional,
|
||||
Set,
|
||||
Union,
|
||||
Iterable, Dict, Optional, Set, List, Any
|
||||
)
|
||||
from dbt.adapters.factory import get_adapter
|
||||
from dbt.config import RuntimeConfig
|
||||
from dbt.context.context_config import (
|
||||
BaseContextConfigGenerator,
|
||||
ContextConfigGenerator,
|
||||
UnrenderedConfigGenerator,
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest, SourceKey
|
||||
from dbt.contracts.graph.model_config import SourceConfig
|
||||
from dbt.contracts.graph.parsed import (
|
||||
UnpatchedSourceDefinition,
|
||||
ParsedSourceDefinition,
|
||||
@@ -18,13 +22,24 @@ from dbt.contracts.graph.unparsed import (
|
||||
SourcePatch,
|
||||
SourceTablePatch,
|
||||
UnparsedSourceTableDefinition,
|
||||
FreshnessThreshold,
|
||||
UnparsedColumn,
|
||||
)
|
||||
from dbt.exceptions import warn_or_error
|
||||
from dbt.exceptions import warn_or_error, InternalException
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
from dbt.parser.schemas import SchemaParser, ParserRef
|
||||
from dbt import ui
|
||||
|
||||
|
||||
# An UnparsedSourceDefinition is taken directly from the yaml
|
||||
# file. It can affect multiple tables, all of which will eventually
|
||||
# have their own source node. An UnparsedSourceDefinition will
|
||||
# generate multiple UnpatchedSourceDefinition nodes (one per
|
||||
# table) in the SourceParser.add_source_definitions. The
|
||||
# SourcePatcher takes an UnparsedSourceDefinition and the
|
||||
# SourcePatch and produces a ParsedSourceDefinition. Each
|
||||
# SourcePatch can be applied to multiple UnpatchedSourceDefinitions.
|
||||
class SourcePatcher:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -37,11 +52,50 @@ class SourcePatcher:
|
||||
self.patches_used: Dict[SourceKey, Set[str]] = {}
|
||||
self.sources: Dict[str, ParsedSourceDefinition] = {}
|
||||
|
||||
# This method calls the 'parse_source' method which takes
|
||||
# the UnpatchedSourceDefinitions in the manifest and combines them
|
||||
# with SourcePatches to produce ParsedSourceDefinitions.
|
||||
def construct_sources(self) -> None:
|
||||
for unique_id, unpatched in self.manifest.sources.items():
|
||||
schema_file = self.manifest.files[unpatched.file_id]
|
||||
if isinstance(unpatched, ParsedSourceDefinition):
|
||||
# In partial parsing, there will be ParsedSourceDefinitions
|
||||
# which must be retained.
|
||||
self.sources[unpatched.unique_id] = unpatched
|
||||
continue
|
||||
# returns None if there is no patch
|
||||
patch = self.get_patch_for(unpatched)
|
||||
|
||||
# returns unpatched if there is no patch
|
||||
patched = self.patch_source(unpatched, patch)
|
||||
|
||||
# now use the patched UnpatchedSourceDefinition to extract test data.
|
||||
for test in self.get_source_tests(patched):
|
||||
if test.config.enabled:
|
||||
self.manifest.add_node_nofile(test)
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(test)
|
||||
# save the test unique_id in the schema_file, so we can
|
||||
# process in partial parsing
|
||||
schema_file.tests.append(test.unique_id)
|
||||
|
||||
# Convert UnpatchedSourceDefinition to a ParsedSourceDefinition
|
||||
parsed = self.parse_source(patched)
|
||||
if parsed.config.enabled:
|
||||
self.sources[unique_id] = parsed
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(parsed)
|
||||
|
||||
self.warn_unused()
|
||||
|
||||
def patch_source(
|
||||
self,
|
||||
unpatched: UnpatchedSourceDefinition,
|
||||
patch: Optional[SourcePatch],
|
||||
) -> UnpatchedSourceDefinition:
|
||||
|
||||
# This skips patching if no patch exists because of the
|
||||
# performance overhead of converting to and from dicts
|
||||
if patch is None:
|
||||
return unpatched
|
||||
|
||||
@@ -65,15 +119,83 @@ class SourcePatcher:
|
||||
source=source, table=table, patch_path=patch_path
|
||||
)
|
||||
|
||||
def parse_source_docs(self, block: UnpatchedSourceDefinition) -> ParserRef:
|
||||
refs = ParserRef()
|
||||
for column in block.columns:
|
||||
description = column.description
|
||||
data_type = column.data_type
|
||||
meta = column.meta
|
||||
refs.add(column, description, data_type, meta)
|
||||
return refs
|
||||
# This converts an UnpatchedSourceDefinition to a ParsedSourceDefinition
|
||||
def parse_source(
|
||||
self, target: UnpatchedSourceDefinition
|
||||
) -> ParsedSourceDefinition:
|
||||
source = target.source
|
||||
table = target.table
|
||||
refs = ParserRef.from_target(table)
|
||||
unique_id = target.unique_id
|
||||
description = table.description or ''
|
||||
meta = table.meta or {}
|
||||
source_description = source.description or ''
|
||||
loaded_at_field = table.loaded_at_field or source.loaded_at_field
|
||||
|
||||
freshness = merge_freshness(source.freshness, table.freshness)
|
||||
quoting = source.quoting.merged(table.quoting)
|
||||
# path = block.path.original_file_path
|
||||
source_meta = source.meta or {}
|
||||
|
||||
# make sure we don't do duplicate tags from source + table
|
||||
tags = sorted(set(itertools.chain(source.tags, table.tags)))
|
||||
|
||||
config = self._generate_source_config(
|
||||
fqn=target.fqn,
|
||||
rendered=True,
|
||||
project_name=target.package_name,
|
||||
)
|
||||
|
||||
unrendered_config = self._generate_source_config(
|
||||
fqn=target.fqn,
|
||||
rendered=False,
|
||||
project_name=target.package_name,
|
||||
)
|
||||
|
||||
if not isinstance(config, SourceConfig):
|
||||
raise InternalException(
|
||||
f'Calculated a {type(config)} for a source, but expected '
|
||||
f'a SourceConfig'
|
||||
)
|
||||
|
||||
default_database = self.root_project.credentials.database
|
||||
|
||||
parsed_source = ParsedSourceDefinition(
|
||||
package_name=target.package_name,
|
||||
database=(source.database or default_database),
|
||||
schema=(source.schema or source.name),
|
||||
identifier=(table.identifier or table.name),
|
||||
root_path=target.root_path,
|
||||
path=target.path,
|
||||
original_file_path=target.original_file_path,
|
||||
columns=refs.column_info,
|
||||
unique_id=unique_id,
|
||||
name=table.name,
|
||||
description=description,
|
||||
external=table.external,
|
||||
source_name=source.name,
|
||||
source_description=source_description,
|
||||
source_meta=source_meta,
|
||||
meta=meta,
|
||||
loader=source.loader,
|
||||
loaded_at_field=loaded_at_field,
|
||||
freshness=freshness,
|
||||
quoting=quoting,
|
||||
resource_type=NodeType.Source,
|
||||
fqn=target.fqn,
|
||||
tags=tags,
|
||||
config=config,
|
||||
unrendered_config=unrendered_config,
|
||||
)
|
||||
|
||||
# relation name is added after instantiation because the adapter does
|
||||
# not provide the relation name for a UnpatchedSourceDefinition object
|
||||
parsed_source.relation_name = self._get_relation_name(parsed_source)
|
||||
return parsed_source
|
||||
|
||||
# This code uses the SchemaParser because it shares the '_parse_generic_test'
|
||||
# code. It might be nice to separate out the generic test code
|
||||
# and make it common to the schema parser and source patcher.
|
||||
def get_schema_parser_for(self, package_name: str) -> 'SchemaParser':
|
||||
if package_name in self.schema_parsers:
|
||||
schema_parser = self.schema_parsers[package_name]
|
||||
@@ -89,9 +211,8 @@ class SourcePatcher:
|
||||
def get_source_tests(
|
||||
self, target: UnpatchedSourceDefinition
|
||||
) -> Iterable[ParsedSchemaTestNode]:
|
||||
schema_parser = self.get_schema_parser_for(target.package_name)
|
||||
for test, column in target.get_tests():
|
||||
yield schema_parser.parse_source_test(
|
||||
yield self.parse_source_test(
|
||||
target=target,
|
||||
test=test,
|
||||
column=column,
|
||||
@@ -99,7 +220,7 @@ class SourcePatcher:
|
||||
|
||||
def get_patch_for(
|
||||
self,
|
||||
unpatched: Union[UnpatchedSourceDefinition, ParsedSourceDefinition],
|
||||
unpatched: UnpatchedSourceDefinition,
|
||||
) -> Optional[SourcePatch]:
|
||||
if isinstance(unpatched, ParsedSourceDefinition):
|
||||
return None
|
||||
@@ -114,31 +235,67 @@ class SourcePatcher:
|
||||
self.patches_used[key].add(unpatched.table.name)
|
||||
return patch
|
||||
|
||||
def construct_sources(self) -> None:
|
||||
# given the UnpatchedSourceDefinition and SourcePatches, combine them
|
||||
# to make a beautiful baby ParsedSourceDefinition.
|
||||
for unique_id, unpatched in self.manifest.sources.items():
|
||||
if isinstance(unpatched, ParsedSourceDefinition):
|
||||
continue
|
||||
patch = self.get_patch_for(unpatched)
|
||||
# This calls _parse_generic_test in the SchemaParser
|
||||
def parse_source_test(
|
||||
self,
|
||||
target: UnpatchedSourceDefinition,
|
||||
test: Dict[str, Any],
|
||||
column: Optional[UnparsedColumn],
|
||||
) -> ParsedSchemaTestNode:
|
||||
column_name: Optional[str]
|
||||
if column is None:
|
||||
column_name = None
|
||||
else:
|
||||
column_name = column.name
|
||||
should_quote = (
|
||||
column.quote or
|
||||
(column.quote is None and target.quote_columns)
|
||||
)
|
||||
if should_quote:
|
||||
column_name = get_adapter(self.root_project).quote(column_name)
|
||||
|
||||
patched = self.patch_source(unpatched, patch)
|
||||
# now use the patched UnpatchedSourceDefinition to extract test
|
||||
# data.
|
||||
for test in self.get_source_tests(patched):
|
||||
if test.config.enabled:
|
||||
self.manifest.add_node_nofile(test)
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(test)
|
||||
tags_sources = [target.source.tags, target.table.tags]
|
||||
if column is not None:
|
||||
tags_sources.append(column.tags)
|
||||
tags = list(itertools.chain.from_iterable(tags_sources))
|
||||
|
||||
schema_parser = self.get_schema_parser_for(unpatched.package_name)
|
||||
parsed = schema_parser.parse_source(patched)
|
||||
if parsed.config.enabled:
|
||||
self.sources[unique_id] = parsed
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(parsed)
|
||||
# TODO: make the generic_test code common so we don't need to
|
||||
# create schema parsers to handle the tests
|
||||
schema_parser = self.get_schema_parser_for(target.package_name)
|
||||
node = schema_parser._parse_generic_test(
|
||||
target=target,
|
||||
test=test,
|
||||
tags=tags,
|
||||
column_name=column_name
|
||||
)
|
||||
# we can't go through result.add_node - no file... instead!
|
||||
if node.config.enabled:
|
||||
self.manifest.add_node_nofile(node)
|
||||
else:
|
||||
self.manifest.add_disabled_nofile(node)
|
||||
return node
|
||||
|
||||
self.warn_unused()
|
||||
def _generate_source_config(self, fqn: List[str], rendered: bool, project_name: str):
|
||||
generator: BaseContextConfigGenerator
|
||||
if rendered:
|
||||
generator = ContextConfigGenerator(self.root_project)
|
||||
else:
|
||||
generator = UnrenderedConfigGenerator(
|
||||
self.root_project
|
||||
)
|
||||
|
||||
return generator.calculate_node_config(
|
||||
config_calls=[],
|
||||
fqn=fqn,
|
||||
resource_type=NodeType.Source,
|
||||
project_name=project_name,
|
||||
base=False,
|
||||
)
|
||||
|
||||
def _get_relation_name(self, node: ParsedSourceDefinition):
|
||||
adapter = get_adapter(self.root_project)
|
||||
relation_cls = adapter.Relation
|
||||
return str(relation_cls.create_from(self.root_project, node))
|
||||
|
||||
def warn_unused(self) -> None:
|
||||
unused_tables: Dict[SourceKey, Optional[Set[str]]] = {}
|
||||
@@ -159,6 +316,8 @@ class SourcePatcher:
|
||||
msg = self.get_unused_msg(unused_tables)
|
||||
warn_or_error(msg, log_fmt=ui.warning_tag('{}'))
|
||||
|
||||
self.manifest.source_patches = {}
|
||||
|
||||
def get_unused_msg(
|
||||
self,
|
||||
unused_tables: Dict[SourceKey, Optional[Set[str]]],
|
||||
@@ -184,16 +343,12 @@ class SourcePatcher:
|
||||
return '\n'.join(msg)
|
||||
|
||||
|
||||
def patch_sources(
|
||||
root_project: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
) -> Dict[str, ParsedSourceDefinition]:
|
||||
"""Patch all the sources found in the results. Updates results.disabled and
|
||||
results.nodes.
|
||||
|
||||
Return a dict of ParsedSourceDefinitions, suitable for use in
|
||||
manifest.sources.
|
||||
"""
|
||||
patcher = SourcePatcher(root_project, manifest)
|
||||
patcher.construct_sources()
|
||||
return patcher.sources
|
||||
def merge_freshness(
|
||||
base: Optional[FreshnessThreshold], update: Optional[FreshnessThreshold]
|
||||
) -> Optional[FreshnessThreshold]:
|
||||
if base is not None and update is not None:
|
||||
return base.merged(update)
|
||||
elif base is None and update is not None:
|
||||
return update
|
||||
else:
|
||||
return None
|
||||
|
||||
@@ -38,7 +38,7 @@ from dbt.rpc.logger import (
|
||||
QueueTimeoutMessage,
|
||||
)
|
||||
from dbt.rpc.method import RemoteMethod
|
||||
|
||||
from dbt.task.rpc.project_commands import RemoteListTask
|
||||
|
||||
# we use this in typing only...
|
||||
from queue import Queue # noqa
|
||||
@@ -78,7 +78,10 @@ class BootstrapProcess(dbt.flags.MP_CONTEXT.Process):
|
||||
|
||||
def task_exec(self) -> None:
|
||||
"""task_exec runs first inside the child process"""
|
||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||
if type(self.task) != RemoteListTask:
|
||||
# TODO: find another solution for this.. in theory it stops us from
|
||||
# being able to kill RemoteListTask processes
|
||||
signal.signal(signal.SIGTERM, sigterm_handler)
|
||||
# the first thing we do in a new process: push logging back over our
|
||||
# queue
|
||||
handler = QueueLogHandler(self.queue)
|
||||
|
||||
@@ -58,6 +58,7 @@ class BaseTask(metaclass=ABCMeta):
|
||||
|
||||
def __init__(self, args, config):
|
||||
self.args = args
|
||||
self.args.single_threaded = False
|
||||
self.config = config
|
||||
|
||||
@classmethod
|
||||
@@ -214,7 +215,7 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
return result
|
||||
|
||||
def _build_run_result(self, node, start_time, status, timing_info, message,
|
||||
agate_table=None, adapter_response=None):
|
||||
agate_table=None, adapter_response=None, failures=None):
|
||||
execution_time = time.time() - start_time
|
||||
thread_id = threading.current_thread().name
|
||||
if adapter_response is None:
|
||||
@@ -227,7 +228,8 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
message=message,
|
||||
node=node,
|
||||
agate_table=agate_table,
|
||||
adapter_response=adapter_response
|
||||
adapter_response=adapter_response,
|
||||
failures=failures
|
||||
)
|
||||
|
||||
def error_result(self, node, message, start_time, timing_info):
|
||||
@@ -256,7 +258,8 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
timing_info=timing_info,
|
||||
message=result.message,
|
||||
agate_table=result.agate_table,
|
||||
adapter_response=result.adapter_response
|
||||
adapter_response=result.adapter_response,
|
||||
failures=result.failures
|
||||
)
|
||||
|
||||
def skip_result(self, node, message):
|
||||
@@ -268,7 +271,8 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
timing=[],
|
||||
message=message,
|
||||
node=node,
|
||||
adapter_response={}
|
||||
adapter_response={},
|
||||
failures=None
|
||||
)
|
||||
|
||||
def compile_and_execute(self, manifest, ctx):
|
||||
|
||||
@@ -24,7 +24,8 @@ class CompileRunner(BaseRunner):
|
||||
thread_id=threading.current_thread().name,
|
||||
execution_time=0,
|
||||
message=None,
|
||||
adapter_response={}
|
||||
adapter_response={},
|
||||
failures=None
|
||||
)
|
||||
|
||||
def compile(self, manifest):
|
||||
|
||||
@@ -59,8 +59,11 @@ class DepsTask(BaseTask):
|
||||
for package in final_deps:
|
||||
logger.info('Installing {}', package)
|
||||
package.install(self.config, renderer)
|
||||
logger.info(' Installed from {}\n',
|
||||
logger.info(' Installed from {}',
|
||||
package.nice_version_name())
|
||||
if package.get_subdirectory():
|
||||
logger.info(' and subdirectory {}\n',
|
||||
package.get_subdirectory())
|
||||
|
||||
self.track_package_install(
|
||||
package_name=package.name,
|
||||
|
||||
@@ -65,7 +65,8 @@ class FreshnessRunner(BaseRunner):
|
||||
timing=timing_info,
|
||||
message=message,
|
||||
node=node,
|
||||
adapter_response={}
|
||||
adapter_response={},
|
||||
failures=None,
|
||||
)
|
||||
|
||||
def from_run_result(self, result, start_time, timing_info):
|
||||
@@ -104,6 +105,7 @@ class FreshnessRunner(BaseRunner):
|
||||
execution_time=0,
|
||||
message=None,
|
||||
adapter_response={},
|
||||
failures=None,
|
||||
**freshness
|
||||
)
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from dbt.adapters.factory import get_adapter
|
||||
from dbt.contracts.graph.compiled import CompileResultNode
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.results import (
|
||||
NodeStatus, TableMetadata, CatalogTable, CatalogResults, Primitive,
|
||||
NodeStatus, TableMetadata, CatalogTable, CatalogResults, PrimitiveDict,
|
||||
CatalogKey, StatsItem, StatsDict, ColumnMetadata, CatalogArtifact
|
||||
)
|
||||
from dbt.exceptions import InternalException
|
||||
@@ -37,9 +37,6 @@ def get_stripped_prefix(source: Dict[str, Any], prefix: str) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
PrimitiveDict = Dict[str, Primitive]
|
||||
|
||||
|
||||
def build_catalog_table(data) -> CatalogTable:
|
||||
# build the new table's metadata + stats
|
||||
metadata = TableMetadata.from_dict(get_stripped_prefix(data, 'table_'))
|
||||
@@ -193,12 +190,6 @@ def get_unique_id_mapping(
|
||||
return node_map, source_map
|
||||
|
||||
|
||||
def _coerce_decimal(value):
|
||||
if isinstance(value, dbt.utils.DECIMALS):
|
||||
return float(value)
|
||||
return value
|
||||
|
||||
|
||||
class GenerateTask(CompileTask):
|
||||
def _get_manifest(self) -> Manifest:
|
||||
if self.manifest is None:
|
||||
@@ -251,7 +242,7 @@ class GenerateTask(CompileTask):
|
||||
catalog_table, exceptions = adapter.get_catalog(self.manifest)
|
||||
|
||||
catalog_data: List[PrimitiveDict] = [
|
||||
dict(zip(catalog_table.column_names, map(_coerce_decimal, row)))
|
||||
dict(zip(catalog_table.column_names, map(dbt.utils._coerce_decimal, row)))
|
||||
for row in catalog_table
|
||||
]
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Type
|
||||
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedExposure,
|
||||
ParsedSourceDefinition,
|
||||
ParsedSourceDefinition
|
||||
)
|
||||
from dbt.graph import (
|
||||
parse_difference,
|
||||
@@ -38,6 +38,8 @@ class ListTask(GraphRunnableTask):
|
||||
'config',
|
||||
'resource_type',
|
||||
'source_name',
|
||||
'original_file_path',
|
||||
'unique_id'
|
||||
))
|
||||
|
||||
def __init__(self, args, config):
|
||||
@@ -120,7 +122,7 @@ class ListTask(GraphRunnableTask):
|
||||
|
||||
def run(self):
|
||||
ManifestTask._runtime_initialize(self)
|
||||
output = self.config.args.output
|
||||
output = self.args.output
|
||||
if output == 'selector':
|
||||
generator = self.generate_selectors
|
||||
elif output == 'name':
|
||||
@@ -133,7 +135,11 @@ class ListTask(GraphRunnableTask):
|
||||
raise InternalException(
|
||||
'Invalid output {}'.format(output)
|
||||
)
|
||||
for result in generator():
|
||||
|
||||
return self.output_results(generator())
|
||||
|
||||
def output_results(self, results):
|
||||
for result in results:
|
||||
self.node_results.append(result)
|
||||
print(result)
|
||||
return self.node_results
|
||||
@@ -143,10 +149,10 @@ class ListTask(GraphRunnableTask):
|
||||
if self.args.models:
|
||||
return [NodeType.Model]
|
||||
|
||||
values = set(self.config.args.resource_types)
|
||||
if not values:
|
||||
if not self.args.resource_types:
|
||||
return list(self.DEFAULT_RESOURCE_VALUES)
|
||||
|
||||
values = set(self.args.resource_types)
|
||||
if 'default' in values:
|
||||
values.remove('default')
|
||||
values.update(self.DEFAULT_RESOURCE_VALUES)
|
||||
|
||||
@@ -61,12 +61,8 @@ class ParseTask(ConfiguredTask):
|
||||
print_timestamped_line("Dependencies loaded")
|
||||
loader = ManifestLoader(root_config, projects, macro_hook)
|
||||
print_timestamped_line("ManifestLoader created")
|
||||
loader.load()
|
||||
manifest = loader.load()
|
||||
print_timestamped_line("Manifest loaded")
|
||||
loader.write_manifest_for_partial_parse()
|
||||
print_timestamped_line("Manifest for partial parse saved")
|
||||
manifest = loader.update_manifest()
|
||||
print_timestamped_line("Manifest updated")
|
||||
_check_manifest(manifest, root_config)
|
||||
print_timestamped_line("Manifest checked")
|
||||
manifest.build_flat_graph()
|
||||
@@ -93,5 +89,6 @@ class ParseTask(ConfiguredTask):
|
||||
if self.args.write_manifest:
|
||||
print_timestamped_line('Writing manifest.')
|
||||
self.write_manifest()
|
||||
|
||||
self.write_perf_info()
|
||||
print_timestamped_line('Done.')
|
||||
|
||||
@@ -12,7 +12,7 @@ from dbt import ui
|
||||
from dbt import utils
|
||||
|
||||
from dbt.contracts.results import (
|
||||
FreshnessStatus, NodeResult, NodeStatus, TestStatus
|
||||
FreshnessStatus, NodeStatus, TestStatus
|
||||
)
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ def get_printable_result(
|
||||
|
||||
|
||||
def print_test_result_line(
|
||||
result: NodeResult, schema_name, index: int, total: int
|
||||
result, index: int, total: int
|
||||
) -> None:
|
||||
model = result.node
|
||||
|
||||
@@ -128,11 +128,11 @@ def print_test_result_line(
|
||||
color = ui.green
|
||||
logger_fn = logger.info
|
||||
elif result.status == TestStatus.Warn:
|
||||
info = 'WARN {}'.format(result.message)
|
||||
info = f'WARN {result.failures}'
|
||||
color = ui.yellow
|
||||
logger_fn = logger.warning
|
||||
elif result.status == TestStatus.Fail:
|
||||
info = 'FAIL {}'.format(result.message)
|
||||
info = f'FAIL {result.failures}'
|
||||
color = ui.red
|
||||
logger_fn = logger.error
|
||||
else:
|
||||
@@ -291,20 +291,23 @@ def print_run_result_error(
|
||||
result.node.name,
|
||||
result.node.original_file_path))
|
||||
|
||||
try:
|
||||
# if message is int, must be rows returned for a test
|
||||
int(result.message)
|
||||
except ValueError:
|
||||
logger.error(" Status: {}".format(result.status))
|
||||
if result.message:
|
||||
logger.error(f" {result.message}")
|
||||
else:
|
||||
num_rows = utils.pluralize(result.message, 'result')
|
||||
logger.error(" Got {}, expected 0.".format(num_rows))
|
||||
logger.error(f" Status: {result.status}")
|
||||
|
||||
if result.node.build_path is not None:
|
||||
with TextOnly():
|
||||
logger.info("")
|
||||
logger.info(" compiled SQL at {}".format(
|
||||
result.node.build_path))
|
||||
result.node.compiled_path))
|
||||
|
||||
if result.node.should_store_failures:
|
||||
with TextOnly():
|
||||
logger.info("")
|
||||
msg = f"select * from {result.node.relation_name}"
|
||||
border = '-' * len(msg)
|
||||
logger.info(f" See test failures:\n {border}\n {msg}\n {border}")
|
||||
|
||||
elif result.message is not None:
|
||||
first = True
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
@@ -15,10 +16,13 @@ from dbt.contracts.rpc import (
|
||||
RPCTestParameters,
|
||||
RemoteCatalogResults,
|
||||
RemoteExecutionResult,
|
||||
RemoteListResults,
|
||||
RemoteRunOperationResult,
|
||||
RPCSnapshotParameters,
|
||||
RPCSourceFreshnessParameters,
|
||||
RPCListParameters,
|
||||
)
|
||||
from dbt.exceptions import RuntimeException
|
||||
from dbt.rpc.method import (
|
||||
Parameters, RemoteManifestMethod
|
||||
)
|
||||
@@ -32,6 +36,7 @@ from dbt.task.run_operation import RunOperationTask
|
||||
from dbt.task.seed import SeedTask
|
||||
from dbt.task.snapshot import SnapshotTask
|
||||
from dbt.task.test import TestTask
|
||||
from dbt.task.list import ListTask
|
||||
|
||||
from .base import RPCTask
|
||||
from .cli import HasCLI
|
||||
@@ -258,3 +263,36 @@ class GetManifest(
|
||||
|
||||
def interpret_results(self, results):
|
||||
return results.manifest is not None
|
||||
|
||||
|
||||
class RemoteListTask(
|
||||
RPCCommandTask[RPCListParameters], ListTask
|
||||
):
|
||||
METHOD_NAME = 'list'
|
||||
|
||||
def set_args(self, params: RPCListParameters) -> None:
|
||||
|
||||
self.args.output = params.output
|
||||
self.args.resource_types = self._listify(params.resource_types)
|
||||
self.args.models = self._listify(params.models)
|
||||
self.args.exclude = self._listify(params.exclude)
|
||||
self.args.selector_name = params.selector
|
||||
self.args.select = self._listify(params.select)
|
||||
|
||||
if self.args.models:
|
||||
if self.args.select:
|
||||
raise RuntimeException(
|
||||
'"models" and "select" are mutually exclusive arguments'
|
||||
)
|
||||
if self.args.resource_types:
|
||||
raise RuntimeException(
|
||||
'"models" and "resource_type" are mutually exclusive '
|
||||
'arguments'
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def output_results(results):
|
||||
return RemoteListResults(
|
||||
output=[json.loads(x) for x in results],
|
||||
logs=None
|
||||
)
|
||||
|
||||
@@ -201,7 +201,8 @@ class ModelRunner(CompileRunner):
|
||||
thread_id=threading.current_thread().name,
|
||||
execution_time=0,
|
||||
message=str(result.response),
|
||||
adapter_response=adapter_response
|
||||
adapter_response=adapter_response,
|
||||
failures=result.get('failures')
|
||||
)
|
||||
|
||||
def _materialization_relations(
|
||||
|
||||
@@ -413,7 +413,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
if len(self._flattened_nodes) == 0:
|
||||
logger.warning("WARNING: Nothing to do. Try checking your model "
|
||||
"configs and model specification args")
|
||||
return self.get_result(
|
||||
result = self.get_result(
|
||||
results=[],
|
||||
generated_at=datetime.utcnow(),
|
||||
elapsed_time=0.0,
|
||||
@@ -421,9 +421,8 @@ class GraphRunnableTask(ManifestTask):
|
||||
else:
|
||||
with TextOnly():
|
||||
logger.info("")
|
||||
|
||||
selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
|
||||
result = self.execute_with_hooks(selected_uids)
|
||||
selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
|
||||
result = self.execute_with_hooks(selected_uids)
|
||||
|
||||
if flags.WRITE_JSON:
|
||||
self.write_manifest()
|
||||
@@ -456,7 +455,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
for node in self.manifest.nodes.values():
|
||||
if node.unique_id not in selected_uids:
|
||||
continue
|
||||
if node.is_refable and not node.is_ephemeral:
|
||||
if node.is_relational and not node.is_ephemeral:
|
||||
relation = adapter.Relation.create_from(self.config, node)
|
||||
result.add(relation.without_identifier())
|
||||
|
||||
@@ -526,7 +525,6 @@ class GraphRunnableTask(ManifestTask):
|
||||
db_schema = (db_lower, schema.lower())
|
||||
if db_schema not in existing_schemas_lowered:
|
||||
existing_schemas_lowered.add(db_schema)
|
||||
|
||||
fut = tpe.submit_connected(
|
||||
adapter, f'create_{info.database or ""}_{info.schema}',
|
||||
create_schema, info
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from dataclasses import dataclass
|
||||
from dbt import utils
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
import threading
|
||||
from typing import Dict, Any, Set
|
||||
from typing import Dict, Any, Union
|
||||
|
||||
from .compile import CompileRunner
|
||||
from .run import RunTask
|
||||
@@ -11,67 +14,49 @@ from dbt.contracts.graph.compiled import (
|
||||
CompiledTestNode,
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedDataTestNode,
|
||||
ParsedSchemaTestNode,
|
||||
)
|
||||
from dbt.contracts.results import RunResult, TestStatus
|
||||
from dbt.contracts.results import TestStatus, PrimitiveDict, RunResult
|
||||
from dbt.context.providers import generate_runtime_model
|
||||
from dbt.clients.jinja import MacroGenerator
|
||||
from dbt.exceptions import (
|
||||
raise_compiler_error,
|
||||
InternalException,
|
||||
missing_materialization
|
||||
)
|
||||
from dbt.graph import (
|
||||
ResourceTypeSelector,
|
||||
SelectionSpec,
|
||||
UniqueId,
|
||||
parse_test_selectors,
|
||||
)
|
||||
from dbt.node_types import NodeType, RunHookType
|
||||
from dbt import flags
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResultData(dbtClassMixin):
|
||||
failures: int
|
||||
should_warn: bool
|
||||
should_error: bool
|
||||
|
||||
|
||||
class TestRunner(CompileRunner):
|
||||
def describe_node(self):
|
||||
node_name = self.node.name
|
||||
return "test {}".format(node_name)
|
||||
|
||||
def print_result_line(self, result):
|
||||
schema_name = self.node.schema
|
||||
print_test_result_line(result, schema_name, self.node_index,
|
||||
self.num_nodes)
|
||||
print_test_result_line(result, self.node_index, self.num_nodes)
|
||||
|
||||
def print_start_line(self):
|
||||
description = self.describe_node()
|
||||
print_start_line(description, self.node_index, self.num_nodes)
|
||||
|
||||
def execute_schema_test(self, test: CompiledSchemaTestNode):
|
||||
_, table = self.adapter.execute(
|
||||
test.compiled_sql,
|
||||
auto_begin=True,
|
||||
fetch=True,
|
||||
)
|
||||
|
||||
num_rows = len(table.rows)
|
||||
if num_rows != 1:
|
||||
num_cols = len(table.columns)
|
||||
raise_compiler_error(
|
||||
f"Bad test {test.test_metadata.name}: "
|
||||
f"Returned {num_rows} rows and {num_cols} cols, but expected "
|
||||
f"1 row and 1 column"
|
||||
)
|
||||
return table[0][0]
|
||||
|
||||
def before_execute(self):
|
||||
self.print_start_line()
|
||||
|
||||
def execute_data_test(
|
||||
def execute_test(
|
||||
self,
|
||||
test: CompiledDataTestNode,
|
||||
test: Union[CompiledDataTestNode, CompiledSchemaTestNode],
|
||||
manifest: Manifest
|
||||
) -> int:
|
||||
) -> TestResultData:
|
||||
context = generate_runtime_model(
|
||||
test, self.config, manifest
|
||||
)
|
||||
@@ -79,7 +64,8 @@ class TestRunner(CompileRunner):
|
||||
materialization_macro = manifest.find_materialization_macro_by_name(
|
||||
self.config.project_name,
|
||||
test.get_materialization(),
|
||||
self.adapter.type())
|
||||
self.adapter.type()
|
||||
)
|
||||
|
||||
if materialization_macro is None:
|
||||
missing_materialization(test, self.adapter.type())
|
||||
@@ -91,7 +77,6 @@ class TestRunner(CompileRunner):
|
||||
)
|
||||
|
||||
# generate materialization macro
|
||||
# simple `select(*)` of the compiled test node
|
||||
macro_func = MacroGenerator(materialization_macro, context)
|
||||
# execute materialization macro
|
||||
macro_func()
|
||||
@@ -101,36 +86,51 @@ class TestRunner(CompileRunner):
|
||||
table = result['table']
|
||||
num_rows = len(table.rows)
|
||||
if num_rows != 1:
|
||||
num_cols = len(table.columns)
|
||||
# since we just wrapped our query in `select count(*)`, we are in
|
||||
# big trouble!
|
||||
raise InternalException(
|
||||
f"dbt internally failed to execute {test.unique_id}: "
|
||||
f"Returned {num_rows} rows and {num_cols} cols, but expected "
|
||||
f"1 row and 1 column"
|
||||
f"Returned {num_rows} rows, but expected "
|
||||
f"1 row"
|
||||
)
|
||||
return int(table[0][0])
|
||||
num_cols = len(table.columns)
|
||||
if num_cols != 3:
|
||||
raise InternalException(
|
||||
f"dbt internally failed to execute {test.unique_id}: "
|
||||
f"Returned {num_cols} columns, but expected "
|
||||
f"3 columns"
|
||||
)
|
||||
|
||||
test_result_dct: PrimitiveDict = dict(
|
||||
zip(
|
||||
[column_name.lower() for column_name in table.column_names],
|
||||
map(utils._coerce_decimal, table.rows[0])
|
||||
)
|
||||
)
|
||||
TestResultData.validate(test_result_dct)
|
||||
return TestResultData.from_dict(test_result_dct)
|
||||
|
||||
def execute(self, test: CompiledTestNode, manifest: Manifest):
|
||||
if isinstance(test, CompiledDataTestNode):
|
||||
failed_rows = self.execute_data_test(test, manifest)
|
||||
elif isinstance(test, CompiledSchemaTestNode):
|
||||
failed_rows = self.execute_schema_test(test)
|
||||
else:
|
||||
raise InternalException(
|
||||
f'Expected compiled schema test or compiled data test, got '
|
||||
f'{type(test)}'
|
||||
)
|
||||
result = self.execute_test(test, manifest)
|
||||
|
||||
severity = test.config.severity.upper()
|
||||
thread_id = threading.current_thread().name
|
||||
num_errors = utils.pluralize(result.failures, 'result')
|
||||
status = None
|
||||
if failed_rows == 0:
|
||||
status = TestStatus.Pass
|
||||
elif severity == 'ERROR' or flags.WARN_ERROR:
|
||||
message = None
|
||||
failures = 0
|
||||
if severity == "ERROR" and result.should_error:
|
||||
status = TestStatus.Fail
|
||||
message = f'Got {num_errors}, configured to fail if {test.config.error_if}'
|
||||
failures = result.failures
|
||||
elif result.should_warn:
|
||||
if flags.WARN_ERROR:
|
||||
status = TestStatus.Fail
|
||||
message = f'Got {num_errors}, configured to fail if {test.config.warn_if}'
|
||||
else:
|
||||
status = TestStatus.Warn
|
||||
message = f'Got {num_errors}, configured to warn if {test.config.warn_if}'
|
||||
failures = result.failures
|
||||
else:
|
||||
status = TestStatus.Warn
|
||||
status = TestStatus.Pass
|
||||
|
||||
return RunResult(
|
||||
node=test,
|
||||
@@ -138,18 +138,15 @@ class TestRunner(CompileRunner):
|
||||
timing=[],
|
||||
thread_id=thread_id,
|
||||
execution_time=0,
|
||||
message=int(failed_rows),
|
||||
adapter_response={}
|
||||
message=message,
|
||||
adapter_response={},
|
||||
failures=failures,
|
||||
)
|
||||
|
||||
def after_execute(self, result):
|
||||
self.print_result_line(result)
|
||||
|
||||
|
||||
DATA_TEST_TYPES = (CompiledDataTestNode, ParsedDataTestNode)
|
||||
SCHEMA_TEST_TYPES = (CompiledSchemaTestNode, ParsedSchemaTestNode)
|
||||
|
||||
|
||||
class TestSelector(ResourceTypeSelector):
|
||||
def __init__(self, graph, manifest, previous_state):
|
||||
super().__init__(
|
||||
@@ -159,17 +156,6 @@ class TestSelector(ResourceTypeSelector):
|
||||
resource_types=[NodeType.Test],
|
||||
)
|
||||
|
||||
def expand_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
|
||||
# exposures can't have tests, so this is relatively easy
|
||||
selected_tests = set()
|
||||
for unique_id in self.graph.select_successors(selected):
|
||||
if unique_id in self.manifest.nodes:
|
||||
node = self.manifest.nodes[unique_id]
|
||||
if node.resource_type == NodeType.Test:
|
||||
selected_tests.add(unique_id)
|
||||
|
||||
return selected | selected_tests
|
||||
|
||||
|
||||
class TestTask(RunTask):
|
||||
"""
|
||||
|
||||
38
core/dbt/tree_sitter_jinja/README.md
Normal file
38
core/dbt/tree_sitter_jinja/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# tree_sitter_jinja Module
|
||||
|
||||
This module contains a tool that processes the most common jinja value templates in dbt model files. The tool uses `tree-sitter-jinja2` and the python bindings for tree-sitter as dependencies.
|
||||
|
||||
# Strategy
|
||||
|
||||
The current strategy is for this processor to be 100% certain when it can accurately extract values from a given model file. Anything less than 100% certainty returns an exception so that the model can be rendered with python Jinja instead.
|
||||
|
||||
There are two cases we want to avoid because they would risk correctness to user's projects:
|
||||
1. Confidently extracting values that would not be extracted by python jinja (false positives)
|
||||
2. Confidently extracting a set of values that do not include values that python jinja would have extracted. (misses)
|
||||
|
||||
If we instead error when we could have confidently extracted values, there is no correctness risk to the user. Only an opportunity to expand the rules to encompass this class of cases as well.
|
||||
|
||||
Even though dbt's usage of jinja is not typed, the type checker statically determines whether or not the current implementation can confidently extract values without relying on python jinja rendering, which is when these errors would otherwise surface. This type checker will become more permissive over time as this tool expands to include more dbt and jinja features.
|
||||
|
||||
# Architecture
|
||||
|
||||
This architecture is optimized for value extraction and for future flexibility. This architecture is expected to change, and is coded in fp-style stages to make those changes easier for the future.
|
||||
|
||||
This processor is composed of several stages:
|
||||
1. parser
|
||||
2. type checker
|
||||
3. extractor
|
||||
|
||||
The parser generated by tree-sitter in the package `tree-sitter-jinja2`. The python hooks are used to traverse the concrete syntax tree that tree-sitter makes in order to create a typed abstract syntax tree in the type checking stage (in Python, we have chosen to represent this with a nested tuple of strings). The errors in the type checking stage are not raised to the user, and are instead used by developers to debug tests.
|
||||
|
||||
The parser is solely responsible for turning text into recognized values, while the type checker does arity checking, and enforces argument list types (e.g. nested function calls like `{{ config(my_ref=ref('table')) }}` will parse but not type check even though it is valid dbt syntax. The tool at this time doesn't have an agreed serialization to communicate refs as config values, but could in the future.)
|
||||
|
||||
The extractor uses the typed abstract syntax tree to easily identify all the refs, sources, and configs present and extract them to a dictionary.
|
||||
|
||||
## Tests
|
||||
|
||||
- Tests are in `test/unit/test_tree_sitter_jinja.py` and run with dbt unit tests
|
||||
|
||||
## Future
|
||||
|
||||
- This module will eventually be rewritten in Rust for the added type safety
|
||||
0
core/dbt/tree_sitter_jinja/__init__.py
Normal file
0
core/dbt/tree_sitter_jinja/__init__.py
Normal file
292
core/dbt/tree_sitter_jinja/extractor.py
Normal file
292
core/dbt/tree_sitter_jinja/extractor.py
Normal file
@@ -0,0 +1,292 @@
|
||||
from dataclasses import dataclass
|
||||
from functools import reduce
|
||||
from itertools import dropwhile
|
||||
from tree_sitter import Parser # type: ignore
|
||||
from tree_sitter_jinja2 import JINJA2_LANGUAGE # type: ignore
|
||||
|
||||
|
||||
# global values
|
||||
parser = Parser()
|
||||
parser.set_language(JINJA2_LANGUAGE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParseFailure(Exception):
|
||||
msg: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TypeCheckFailure(Exception):
|
||||
msg: str
|
||||
|
||||
|
||||
def named_children(node):
|
||||
return list(filter(lambda x: x.is_named, node.children))
|
||||
|
||||
|
||||
def text_from_node(source_bytes, node):
|
||||
return source_bytes[node.start_byte:node.end_byte].decode('utf8')
|
||||
|
||||
|
||||
def strip_quotes(text):
|
||||
if text:
|
||||
return text[1:-1]
|
||||
|
||||
|
||||
# flatten([[1,2],[3,4]]) = [1,2,3,4]
|
||||
def flatten(list_of_lists):
|
||||
return [item for sublist in list_of_lists for item in sublist]
|
||||
|
||||
|
||||
def has_kwarg_child_named(name_list, node):
|
||||
kwargs = node[1:]
|
||||
for kwarg in kwargs:
|
||||
if kwarg[1] in name_list:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# if all positional args come before kwargs return True.
|
||||
# otherwise return false.
|
||||
def kwargs_last(args):
|
||||
def not_kwarg(node):
|
||||
return node.type != 'kwarg'
|
||||
|
||||
no_leading_positional_args = dropwhile(not_kwarg, args)
|
||||
dangling_positional_args = filter(not_kwarg, no_leading_positional_args)
|
||||
return len(list(dangling_positional_args)) == 0
|
||||
|
||||
|
||||
def error_count(node):
|
||||
if node.has_error:
|
||||
return 1
|
||||
|
||||
if node.children:
|
||||
return reduce(lambda a, b: a + b, map(lambda x: error_count(x), node.children))
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
# meat of the type checker
|
||||
# throws a TypeCheckError or returns a typed ast in the form of a nested tuple
|
||||
def _to_typed(source_bytes, node):
|
||||
if node.type == 'lit_string':
|
||||
return strip_quotes(text_from_node(source_bytes, node))
|
||||
|
||||
if node.type == 'bool':
|
||||
text = text_from_node(source_bytes, node)
|
||||
if text == 'True':
|
||||
return True
|
||||
if text == 'False':
|
||||
return False
|
||||
|
||||
if node.type == 'jinja_expression':
|
||||
raise TypeCheckFailure("jinja expressions are unsupported: {% syntax like this %}")
|
||||
|
||||
elif node.type == 'list':
|
||||
elems = named_children(node)
|
||||
for elem in elems:
|
||||
if elem.type == 'fn_call':
|
||||
raise TypeCheckFailure("list elements cannot be function calls")
|
||||
return ('list', *(_to_typed(source_bytes, elem) for elem in elems))
|
||||
|
||||
elif node.type == 'kwarg':
|
||||
value_node = node.child_by_field_name('value')
|
||||
if value_node.type == 'fn_call':
|
||||
raise TypeCheckFailure("keyword arguments can not be function calls")
|
||||
key_node = node.child_by_field_name('key')
|
||||
key_text = text_from_node(source_bytes, key_node)
|
||||
return ('kwarg', key_text, _to_typed(source_bytes, value_node))
|
||||
|
||||
elif node.type == 'dict':
|
||||
# locally mutate list of kv pairs
|
||||
pairs = []
|
||||
for pair in named_children(node):
|
||||
key = pair.child_by_field_name('key')
|
||||
value = pair.child_by_field_name('value')
|
||||
if key.type != 'lit_string':
|
||||
raise TypeCheckFailure("all dict keys must be string literals")
|
||||
if value.type == 'fn_call':
|
||||
raise TypeCheckFailure("dict values cannot be function calls")
|
||||
pairs.append((key, value))
|
||||
return (
|
||||
'dict',
|
||||
*(
|
||||
(
|
||||
strip_quotes(text_from_node(source_bytes, pair[0])),
|
||||
_to_typed(source_bytes, pair[1])
|
||||
) for pair in pairs
|
||||
))
|
||||
|
||||
elif node.type == 'source_file':
|
||||
children = named_children(node)
|
||||
return ('root', *(_to_typed(source_bytes, child) for child in children))
|
||||
|
||||
elif node.type == 'fn_call':
|
||||
name = text_from_node(source_bytes, node.child_by_field_name('fn_name'))
|
||||
arg_list = node.child_by_field_name('argument_list')
|
||||
arg_count = arg_list.named_child_count
|
||||
args = named_children(arg_list)
|
||||
if not kwargs_last(args):
|
||||
raise TypeCheckFailure("keyword arguments must all be at the end")
|
||||
|
||||
if name == 'ref':
|
||||
if arg_count != 1 and arg_count != 2:
|
||||
raise TypeCheckFailure(f"expected ref to have 1 or 2 arguments. found {arg_count}")
|
||||
for arg in args:
|
||||
if arg.type != 'lit_string':
|
||||
raise TypeCheckFailure(f"all ref arguments must be strings. found {arg.type}")
|
||||
return ('ref', *(_to_typed(source_bytes, arg) for arg in args))
|
||||
|
||||
elif name == 'source':
|
||||
if arg_count != 2:
|
||||
raise TypeCheckFailure(f"expected source to 2 arguments. found {arg_count}")
|
||||
for arg in args:
|
||||
if arg.type != 'kwarg' and arg.type != 'lit_string':
|
||||
raise TypeCheckFailure(f"unexpected argument type in source. Found {arg.type}")
|
||||
# note: keyword vs positional argument order is checked above in fn_call checks
|
||||
if args[0].type == 'kwarg':
|
||||
key_name = text_from_node(source_bytes, args[0].child_by_field_name('key'))
|
||||
if key_name != 'source_name':
|
||||
raise TypeCheckFailure(
|
||||
"first keyword argument in source must be source_name found"
|
||||
f"{args[0].child_by_field_name('key')}"
|
||||
)
|
||||
if args[1].type == 'kwarg':
|
||||
key_name = text_from_node(source_bytes, args[1].child_by_field_name('key'))
|
||||
if key_name != 'table_name':
|
||||
raise TypeCheckFailure(
|
||||
"second keyword argument in source must be table_name found"
|
||||
f"{args[1].child_by_field_name('key')}"
|
||||
)
|
||||
|
||||
# restructure source calls to look like they
|
||||
# were all called positionally for uniformity
|
||||
source_name = args[0]
|
||||
table_name = args[1]
|
||||
if args[0].type == 'kwarg':
|
||||
source_name = args[0].child_by_field_name('value')
|
||||
if args[1].type == 'kwarg':
|
||||
table_name = args[1].child_by_field_name('value')
|
||||
|
||||
return (
|
||||
'source',
|
||||
_to_typed(source_bytes, source_name),
|
||||
_to_typed(source_bytes, table_name)
|
||||
)
|
||||
|
||||
elif name == 'config':
|
||||
if arg_count < 1:
|
||||
raise TypeCheckFailure(
|
||||
f"expected config to have at least one argument. found {arg_count}"
|
||||
)
|
||||
excluded_config_args = ['post-hook', 'post_hook', 'pre-hook', 'pre_hook']
|
||||
for arg in args:
|
||||
if arg.type != 'kwarg':
|
||||
raise TypeCheckFailure(
|
||||
f"unexpected non keyword argument in config. found {arg.type}"
|
||||
)
|
||||
key_name = text_from_node(source_bytes, arg.child_by_field_name('key'))
|
||||
if key_name in excluded_config_args:
|
||||
raise TypeCheckFailure(f"excluded config kwarg found: {key_name}")
|
||||
return ('config', *(_to_typed(source_bytes, arg) for arg in args))
|
||||
|
||||
else:
|
||||
raise TypeCheckFailure(f"unexpected function call to {name}")
|
||||
|
||||
else:
|
||||
raise TypeCheckFailure(f"unexpected node type: {node.type}")
|
||||
|
||||
|
||||
# Entry point for type checking. Either returns a single TypeCheckFailure or
|
||||
# a typed-ast in the form of nested tuples.
|
||||
# Depends on the source because we check for built-ins. It's a bit of a hack,
|
||||
# but it works well at this scale.
|
||||
def type_check(source_bytes, node):
|
||||
try:
|
||||
return _to_typed(source_bytes, node)
|
||||
# if an error was thrown, return it instead.
|
||||
except TypeCheckFailure as e:
|
||||
return e
|
||||
|
||||
|
||||
# operates on a typed ast
|
||||
def _extract(node, data):
|
||||
# reached a leaf
|
||||
if not isinstance(node, tuple):
|
||||
return node
|
||||
|
||||
if node[0] == 'list':
|
||||
return list(_extract(child, data) for child in node[1:])
|
||||
|
||||
if node[0] == 'dict':
|
||||
return {pair[0]: _extract(pair[1], data) for pair in node[1:]}
|
||||
|
||||
if node[0] == 'ref':
|
||||
# no package name
|
||||
if len(node) == 2:
|
||||
ref = [node[1]]
|
||||
else:
|
||||
ref = [node[1], node[2]]
|
||||
data['refs'].append(ref)
|
||||
|
||||
# configs are the only ones that can recurse like this
|
||||
# e.g. {{ config(key=[{'nested':'values'}]) }}
|
||||
if node[0] == 'config':
|
||||
for kwarg in node[1:]:
|
||||
data['configs'].append((kwarg[1], _extract(kwarg[2], data)))
|
||||
|
||||
if node[0] == 'source':
|
||||
for arg in node[1:]:
|
||||
data['sources'].add((node[1], node[2]))
|
||||
|
||||
# generator statement evaluated as tuple for effects
|
||||
tuple(_extract(child, data) for child in node[1:])
|
||||
|
||||
|
||||
def extract(node):
|
||||
data = {
|
||||
'refs': [],
|
||||
'sources': set(),
|
||||
'configs': [],
|
||||
'python_jinja': False
|
||||
}
|
||||
_extract(node, data)
|
||||
return data
|
||||
|
||||
|
||||
# returns a fully processed, typed ast or an exception
|
||||
def process_source(parser, string):
|
||||
source_bytes = bytes(string, "utf8")
|
||||
tree = parser.parse(source_bytes)
|
||||
count = error_count(tree.root_node)
|
||||
|
||||
# check for parser errors
|
||||
if count > 0:
|
||||
return ParseFailure("tree-sitter found errors")
|
||||
|
||||
# if there are no parsing errors check for type errors
|
||||
checked_ast_or_error = type_check(source_bytes, tree.root_node)
|
||||
if isinstance(checked_ast_or_error, TypeCheckFailure):
|
||||
err = checked_ast_or_error
|
||||
return err
|
||||
|
||||
# if there are no parsing errors and no type errors, return the typed ast
|
||||
typed_root = checked_ast_or_error
|
||||
return typed_root
|
||||
|
||||
|
||||
# entry point function
|
||||
def extract_from_source(string):
|
||||
res = process_source(parser, string)
|
||||
|
||||
if isinstance(res, Exception):
|
||||
return {
|
||||
'refs': [],
|
||||
'sources': set(),
|
||||
'configs': [],
|
||||
'python_jinja': True
|
||||
}
|
||||
|
||||
typed_root = res
|
||||
return extract(typed_root)
|
||||
@@ -404,6 +404,8 @@ def pluralize(count, string: Union[str, NodeType]):
|
||||
return f'{count} {pluralized}'
|
||||
|
||||
|
||||
# Note that this only affects hologram json validation.
|
||||
# It has no effect on mashumaro serialization.
|
||||
def restrict_to(*restrictions):
|
||||
"""Create the metadata for a restricted dataclass field"""
|
||||
return {'restrict': list(restrictions)}
|
||||
@@ -420,6 +422,12 @@ def coerce_dict_str(value: Any) -> Optional[Dict[str, Any]]:
|
||||
return None
|
||||
|
||||
|
||||
def _coerce_decimal(value):
|
||||
if isinstance(value, DECIMALS):
|
||||
return float(value)
|
||||
return value
|
||||
|
||||
|
||||
def lowercase(value: Optional[str]) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
@@ -96,5 +96,5 @@ def _get_dbt_plugins_info():
|
||||
yield plugin_name, mod.version
|
||||
|
||||
|
||||
__version__ = '0.19.0'
|
||||
__version__ = '0.20.0rc1'
|
||||
installed = get_installed_version()
|
||||
|
||||
@@ -24,7 +24,7 @@ def read(fname):
|
||||
|
||||
|
||||
package_name = "dbt-core"
|
||||
package_version = "0.19.0"
|
||||
package_version = "0.20.0rc1"
|
||||
description = """dbt (data build tool) is a command line tool that helps \
|
||||
analysts and engineers transform data in their warehouse more effectively"""
|
||||
|
||||
@@ -59,21 +59,24 @@ setup(
|
||||
'scripts/dbt',
|
||||
],
|
||||
install_requires=[
|
||||
'Jinja2==2.11.2',
|
||||
'Jinja2==2.11.3',
|
||||
'PyYAML>=3.11',
|
||||
'sqlparse>=0.2.3,<0.4',
|
||||
'networkx>=2.3,<3',
|
||||
'minimal-snowplow-tracker==0.0.2',
|
||||
'colorama>=0.3.9,<0.4.5',
|
||||
'agate>=1.6,<1.6.2',
|
||||
'isodate>=0.6,<0.7',
|
||||
'json-rpc>=1.12,<2',
|
||||
'werkzeug>=0.15,<2.0',
|
||||
'colorama>=0.3.9,<0.4.5',
|
||||
'dataclasses>=0.6,<0.9;python_version<"3.7"',
|
||||
'hologram==0.0.14',
|
||||
'isodate>=0.6,<0.7',
|
||||
'json-rpc>=1.12,<2',
|
||||
'logbook>=1.5,<1.6',
|
||||
'mashumaro==2.0',
|
||||
'mashumaro==2.5',
|
||||
'minimal-snowplow-tracker==0.0.2',
|
||||
'networkx>=2.3,<3',
|
||||
'packaging~=20.9',
|
||||
'sqlparse>=0.2.3,<0.4',
|
||||
'tree-sitter==0.19.0',
|
||||
'tree-sitter-jinja2==0.1.0a1',
|
||||
'typing-extensions>=3.7.4,<3.8',
|
||||
'werkzeug>=0.15,<2.0',
|
||||
# the following are all to match snowflake-connector-python
|
||||
'requests<3.0.0',
|
||||
'idna>=2.5,<3',
|
||||
|
||||
@@ -4,10 +4,15 @@ FROM $BASE_IMAGE
|
||||
ARG BASE_REQUIREMENTS_SRC_PATH
|
||||
ARG WHEEL_REQUIREMENTS_SRC_PATH
|
||||
ARG DIST_PATH
|
||||
RUN apt-get update \
|
||||
|
||||
# We need backport packages to get a more recent version of git
|
||||
RUN printf "deb http://deb.debian.org/debian buster-backports main" > /etc/apt/sources.list.d/backports_git.list && \
|
||||
apt-get update \
|
||||
&& apt-get dist-upgrade -y \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
git-man/buster-backports \
|
||||
git/buster-backports \
|
||||
ssh-client \
|
||||
software-properties-common \
|
||||
make \
|
||||
build-essential \
|
||||
|
||||
75
docker/requirements/requirements.0.20.0b1.txt
Normal file
75
docker/requirements/requirements.0.20.0b1.txt
Normal file
@@ -0,0 +1,75 @@
|
||||
agate==1.6.1
|
||||
asn1crypto==1.4.0
|
||||
attrs==20.3.0
|
||||
azure-common==1.1.27
|
||||
azure-core==1.13.0
|
||||
azure-storage-blob==12.8.1
|
||||
Babel==2.9.1
|
||||
boto3==1.17.62
|
||||
botocore==1.20.62
|
||||
cachetools==4.2.2
|
||||
certifi==2020.12.5
|
||||
cffi==1.14.5
|
||||
chardet==3.0.4
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.7
|
||||
decorator==4.4.2
|
||||
google-api-core==1.26.3
|
||||
google-auth==1.30.0
|
||||
google-cloud-bigquery==2.15.0
|
||||
google-cloud-core==1.6.0
|
||||
google-crc32c==1.1.2
|
||||
google-resumable-media==1.2.0
|
||||
googleapis-common-protos==1.53.0
|
||||
grpcio==1.37.1
|
||||
hologram==0.0.14
|
||||
idna==2.10
|
||||
importlib-metadata==4.0.1
|
||||
isodate==0.6.0
|
||||
jeepney==0.6.0
|
||||
Jinja2==2.11.2
|
||||
jmespath==0.10.0
|
||||
json-rpc==1.13.0
|
||||
jsonschema==3.1.1
|
||||
keyring==21.8.0
|
||||
leather==0.3.3
|
||||
Logbook==1.5.3
|
||||
MarkupSafe==1.1.1
|
||||
mashumaro==2.0
|
||||
minimal-snowplow-tracker==0.0.2
|
||||
msgpack==1.0.2
|
||||
msrest==0.6.21
|
||||
networkx==2.5.1
|
||||
oauthlib==3.1.0
|
||||
oscrypto==1.2.1
|
||||
packaging==20.9
|
||||
parsedatetime==2.6
|
||||
proto-plus==1.18.1
|
||||
protobuf==3.15.8
|
||||
psycopg2-binary==2.8.6
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycparser==2.20
|
||||
pycryptodomex==3.10.1
|
||||
PyJWT==2.1.0
|
||||
pyOpenSSL==20.0.1
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.17.3
|
||||
python-dateutil==2.8.1
|
||||
python-slugify==5.0.0
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1
|
||||
PyYAML==5.4.1
|
||||
requests==2.25.1
|
||||
requests-oauthlib==1.3.0
|
||||
rsa==4.7.2
|
||||
s3transfer==0.4.2
|
||||
SecretStorage==3.3.1
|
||||
six==1.15.0
|
||||
snowflake-connector-python==2.4.3
|
||||
sqlparse==0.3.1
|
||||
text-unidecode==1.3
|
||||
typing-extensions==3.7.4.3
|
||||
urllib3==1.26.4
|
||||
Werkzeug==1.0.1
|
||||
zipp==3.4.1
|
||||
78
docker/requirements/requirements.0.20.0rc1.txt
Normal file
78
docker/requirements/requirements.0.20.0rc1.txt
Normal file
@@ -0,0 +1,78 @@
|
||||
agate==1.6.1
|
||||
asn1crypto==1.4.0
|
||||
attrs==21.2.0
|
||||
azure-common==1.1.27
|
||||
azure-core==1.15.0
|
||||
azure-storage-blob==12.8.1
|
||||
Babel==2.9.1
|
||||
boto3==1.17.87
|
||||
botocore==1.20.87
|
||||
cachetools==4.2.2
|
||||
certifi==2020.12.5
|
||||
cffi==1.14.5
|
||||
chardet==3.0.4
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.7
|
||||
decorator==4.4.2
|
||||
google-api-core==1.29.0
|
||||
google-auth==1.30.1
|
||||
google-cloud-bigquery==2.18.0
|
||||
google-cloud-core==1.6.0
|
||||
google-crc32c==1.1.2
|
||||
google-resumable-media==1.3.0
|
||||
googleapis-common-protos==1.53.0
|
||||
grpcio==1.38.0
|
||||
hologram==0.0.14
|
||||
idna==2.10
|
||||
importlib-metadata==4.5.0
|
||||
isodate==0.6.0
|
||||
jeepney==0.6.0
|
||||
Jinja2==2.11.3
|
||||
jmespath==0.10.0
|
||||
json-rpc==1.13.0
|
||||
jsonschema==3.1.1
|
||||
keyring==21.8.0
|
||||
leather==0.3.3
|
||||
Logbook==1.5.3
|
||||
MarkupSafe==2.0.1
|
||||
mashumaro==2.5
|
||||
minimal-snowplow-tracker==0.0.2
|
||||
msgpack==1.0.2
|
||||
msrest==0.6.21
|
||||
networkx==2.5.1
|
||||
oauthlib==3.1.1
|
||||
oscrypto==1.2.1
|
||||
packaging==20.9
|
||||
parsedatetime==2.6
|
||||
proto-plus==1.18.1
|
||||
protobuf==3.17.2
|
||||
psycopg2-binary==2.8.6
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycparser==2.20
|
||||
pycryptodomex==3.10.1
|
||||
PyJWT==2.1.0
|
||||
pyOpenSSL==20.0.1
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.17.3
|
||||
python-dateutil==2.8.1
|
||||
python-slugify==5.0.2
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1
|
||||
PyYAML==5.4.1
|
||||
requests==2.25.1
|
||||
requests-oauthlib==1.3.0
|
||||
rsa==4.7.2
|
||||
s3transfer==0.4.2
|
||||
SecretStorage==3.3.1
|
||||
setuptools-dso==2.0a1
|
||||
six==1.16.0
|
||||
snowflake-connector-python==2.4.4
|
||||
sqlparse==0.3.1
|
||||
text-unidecode==1.3
|
||||
tree-sitter==0.19.0
|
||||
tree-sitter-jinja2==0.1.0a1
|
||||
typing-extensions==3.7.4.3
|
||||
urllib3==1.26.5
|
||||
Werkzeug==1.0.1
|
||||
zipp==3.4.1
|
||||
@@ -1 +1 @@
|
||||
version = '0.19.0'
|
||||
version = '0.20.0rc1'
|
||||
|
||||
@@ -114,15 +114,17 @@ class BigQueryCredentials(Credentials):
|
||||
return ('method', 'database', 'schema', 'location', 'priority',
|
||||
'timeout_seconds', 'maximum_bytes_billed')
|
||||
|
||||
def __post_init__(self):
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]:
|
||||
# We need to inject the correct value of the database (aka project) at
|
||||
# this stage, ref
|
||||
# https://github.com/fishtown-analytics/dbt/pull/2908#discussion_r532927436.
|
||||
|
||||
# `database` is an alias of `project` in BigQuery
|
||||
if self.database is None:
|
||||
if 'database' not in d:
|
||||
_, database = get_bigquery_defaults()
|
||||
self.database = database
|
||||
d['database'] = database
|
||||
return d
|
||||
|
||||
|
||||
class BigQueryConnectionManager(BaseConnectionManager):
|
||||
@@ -307,7 +309,7 @@ class BigQueryConnectionManager(BaseConnectionManager):
|
||||
|
||||
logger.debug('On {}: {}', conn.name, sql)
|
||||
|
||||
if self.profile.query_comment.job_label:
|
||||
if self.profile.query_comment and self.profile.query_comment.job_label:
|
||||
query_comment = self.query_header.comment.query_comment
|
||||
labels = self._labels_from_query_comment(query_comment)
|
||||
else:
|
||||
|
||||
@@ -20,7 +20,7 @@ except ImportError:
|
||||
|
||||
|
||||
package_name = "dbt-bigquery"
|
||||
package_version = "0.19.0"
|
||||
package_version = "0.20.0rc1"
|
||||
description = """The bigquery adapter plugin for dbt (data build tool)"""
|
||||
|
||||
this_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = '0.19.0'
|
||||
version = '0.20.0rc1'
|
||||
|
||||
@@ -1,21 +1,62 @@
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Set
|
||||
from typing import Optional, Set, List, Any
|
||||
from dbt.adapters.base.meta import available
|
||||
from dbt.adapters.base.impl import AdapterConfig
|
||||
from dbt.adapters.sql import SQLAdapter
|
||||
from dbt.adapters.postgres import PostgresConnectionManager
|
||||
from dbt.adapters.postgres import PostgresColumn
|
||||
from dbt.adapters.postgres import PostgresRelation
|
||||
from dbt.dataclass_schema import dbtClassMixin, ValidationError
|
||||
import dbt.exceptions
|
||||
import dbt.utils
|
||||
|
||||
|
||||
# note that this isn't an adapter macro, so just a single underscore
|
||||
GET_RELATIONS_MACRO_NAME = 'postgres_get_relations'
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresIndexConfig(dbtClassMixin):
|
||||
columns: List[str]
|
||||
unique: bool = False
|
||||
type: Optional[str] = None
|
||||
|
||||
def render(self, relation):
|
||||
# We append the current timestamp to the index name because otherwise
|
||||
# the index will only be created on every other run. See
|
||||
# https://github.com/fishtown-analytics/dbt/issues/1945#issuecomment-576714925
|
||||
# for an explanation.
|
||||
now = datetime.utcnow().isoformat()
|
||||
inputs = (self.columns +
|
||||
[relation.render(), str(self.unique), str(self.type), now])
|
||||
string = '_'.join(inputs)
|
||||
return dbt.utils.md5(string)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, raw_index) -> Optional['PostgresIndexConfig']:
|
||||
if raw_index is None:
|
||||
return None
|
||||
try:
|
||||
cls.validate(raw_index)
|
||||
return cls.from_dict(raw_index)
|
||||
except ValidationError as exc:
|
||||
msg = dbt.exceptions.validator_error_message(exc)
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Could not parse index config: {msg}'
|
||||
)
|
||||
except TypeError:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Invalid index config:\n'
|
||||
f' Got: {raw_index}\n'
|
||||
f' Expected a dictionary with at minimum a "columns" key'
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresConfig(AdapterConfig):
|
||||
unlogged: Optional[bool] = None
|
||||
indexes: Optional[List[PostgresIndexConfig]] = None
|
||||
|
||||
|
||||
class PostgresAdapter(SQLAdapter):
|
||||
@@ -42,6 +83,10 @@ class PostgresAdapter(SQLAdapter):
|
||||
# return an empty string on success so macros can call this
|
||||
return ''
|
||||
|
||||
@available
|
||||
def parse_index(self, raw_index: Any) -> Optional[PostgresIndexConfig]:
|
||||
return PostgresIndexConfig.parse(raw_index)
|
||||
|
||||
def _link_cached_database_relations(self, schemas: Set[str]):
|
||||
"""
|
||||
:param schemas: The set of schemas that should have links added.
|
||||
|
||||
@@ -14,6 +14,21 @@
|
||||
);
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__get_create_index_sql(relation, index_dict) -%}
|
||||
{%- set index_config = adapter.parse_index(index_dict) -%}
|
||||
{%- set comma_separated_columns = ", ".join(index_config.columns) -%}
|
||||
{%- set index_name = index_config.render(relation) -%}
|
||||
|
||||
create {% if index_config.unique -%}
|
||||
unique
|
||||
{%- endif %} index if not exists
|
||||
"{{ index_name }}"
|
||||
on {{ relation }} {% if index_config.type -%}
|
||||
using {{ index_config.type }}
|
||||
{%- endif %}
|
||||
({{ comma_separated_columns }});
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__create_schema(relation) -%}
|
||||
{% if relation.database -%}
|
||||
{{ adapter.verify_database(relation.database) }}
|
||||
|
||||
@@ -41,7 +41,7 @@ def _dbt_psycopg2_name():
|
||||
|
||||
|
||||
package_name = "dbt-postgres"
|
||||
package_version = "0.19.0"
|
||||
package_version = "0.20.0rc1"
|
||||
description = """The postgres adpter plugin for dbt (data build tool)"""
|
||||
|
||||
this_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = '0.19.0'
|
||||
version = '0.20.0rc1'
|
||||
|
||||
@@ -20,7 +20,7 @@ except ImportError:
|
||||
|
||||
|
||||
package_name = "dbt-redshift"
|
||||
package_version = "0.19.0"
|
||||
package_version = "0.20.0rc1"
|
||||
description = """The redshift adapter plugin for dbt (data build tool)"""
|
||||
|
||||
this_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
@@ -1 +1 @@
|
||||
version = '0.19.0'
|
||||
version = '0.20.0rc1'
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from time import sleep
|
||||
from typing import Optional
|
||||
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
@@ -106,6 +107,7 @@ class SnowflakeCredentials(Credentials):
|
||||
'need a client ID a client secret, and a refresh token to get '
|
||||
'an access token'
|
||||
)
|
||||
|
||||
# should the full url be a config item?
|
||||
token_url = _TOKEN_REQUEST_URL.format(self.account)
|
||||
# I think this is only used to redirect on success, which we ignore
|
||||
@@ -125,10 +127,25 @@ class SnowflakeCredentials(Credentials):
|
||||
'Authorization': f'Basic {auth}',
|
||||
'Content-type': 'application/x-www-form-urlencoded;charset=utf-8'
|
||||
}
|
||||
result = requests.post(token_url, headers=headers, data=data)
|
||||
result_json = result.json()
|
||||
if 'access_token' not in result_json:
|
||||
raise DatabaseException(f'Did not get a token: {result_json}')
|
||||
|
||||
result_json = None
|
||||
max_iter = 20
|
||||
# Attempt to obtain JSON for 1 second before throwing an error
|
||||
for i in range(max_iter):
|
||||
result = requests.post(token_url, headers=headers, data=data)
|
||||
try:
|
||||
result_json = result.json()
|
||||
break
|
||||
except ValueError as e:
|
||||
message = result.text
|
||||
logger.debug(f"Got a non-json response ({result.status_code}): \
|
||||
{e}, message: {message}")
|
||||
sleep(0.05)
|
||||
|
||||
if result_json is None:
|
||||
raise DatabaseException(f"""Did not receive valid json with access_token.
|
||||
Showing json response: {result_json}""")
|
||||
|
||||
return result_json['access_token']
|
||||
|
||||
def _get_private_key(self):
|
||||
|
||||
@@ -155,9 +155,8 @@
|
||||
|
||||
|
||||
{% macro snowflake__alter_column_comment(relation, column_dict) -%}
|
||||
alter {{ relation.type }} {{ relation }} alter
|
||||
{% for column_name in column_dict %}
|
||||
{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} COMMENT $${{ column_dict[column_name]['description'] | replace('$', '[$]') }}$$ {{ ',' if not loop.last else ';' }}
|
||||
comment if exists on column {{ relation }}.{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} is $${{ column_dict[column_name]['description'] | replace('$', '[$]') }}$$;
|
||||
{% endfor %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user