mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-19 05:21:27 +00:00
Compare commits
387 Commits
string_sel
...
fix_test_c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2516e83028 | ||
|
|
081f30ee2d | ||
|
|
89e4872c21 | ||
|
|
33dc970859 | ||
|
|
f73202734c | ||
|
|
32bacdab4b | ||
|
|
6113c3b533 | ||
|
|
1c634af489 | ||
|
|
428cdea2dc | ||
|
|
f14b55f839 | ||
|
|
5934d263b8 | ||
|
|
3860d919e6 | ||
|
|
fd0b9434ae | ||
|
|
efb30d0262 | ||
|
|
cee0bfbfa2 | ||
|
|
dc684d31d3 | ||
|
|
bfdf7f01b5 | ||
|
|
2cc0579b6e | ||
|
|
bfc472dc0f | ||
|
|
ea4e3680ab | ||
|
|
f02139956d | ||
|
|
7ec5c122e1 | ||
|
|
a10ab99efc | ||
|
|
9f4398c557 | ||
|
|
d60f6bc89b | ||
|
|
617eeb4ff7 | ||
|
|
5b55825638 | ||
|
|
103d524db5 | ||
|
|
babd084a9b | ||
|
|
749f87397e | ||
|
|
307d47ebaf | ||
|
|
6acd4b91c1 | ||
|
|
f4a9530894 | ||
|
|
ab65385a16 | ||
|
|
ebd761e3dc | ||
|
|
3b942ec790 | ||
|
|
b373486908 | ||
|
|
c8cd5502f6 | ||
|
|
d6dd968c4f | ||
|
|
b8d73d2197 | ||
|
|
17e57f1e0b | ||
|
|
e21bf9fbc7 | ||
|
|
12e281f076 | ||
|
|
a5ce658755 | ||
|
|
ce30dfa82d | ||
|
|
c04d1e9d5c | ||
|
|
80031d122c | ||
|
|
943b090c90 | ||
|
|
39fd53d1f9 | ||
|
|
777e7b3b6d | ||
|
|
2783fe2a9f | ||
|
|
f5880cb001 | ||
|
|
26e501008a | ||
|
|
2c67e3f5c7 | ||
|
|
033596021d | ||
|
|
f36c72e085 | ||
|
|
fefaf7b4be | ||
|
|
91431401ad | ||
|
|
59d96c08a1 | ||
|
|
f10447395b | ||
|
|
c2b6222798 | ||
|
|
3a58c49184 | ||
|
|
440a5e49e2 | ||
|
|
77c10713a3 | ||
|
|
48e367ce2f | ||
|
|
934c23bf39 | ||
|
|
e0febcb6c3 | ||
|
|
044a6c6ea4 | ||
|
|
8ebbc10572 | ||
|
|
7435828082 | ||
|
|
369b595e8a | ||
|
|
9a6d30f03d | ||
|
|
6bdd01d52b | ||
|
|
bae9767498 | ||
|
|
b0e50dedb8 | ||
|
|
96bfb3b259 | ||
|
|
909068dfa8 | ||
|
|
f4c74968be | ||
|
|
0e958f3704 | ||
|
|
a8b2942f93 | ||
|
|
564fe62400 | ||
|
|
5c5013191b | ||
|
|
31989b85d1 | ||
|
|
5ed4af2372 | ||
|
|
4d18e391aa | ||
|
|
2feeb5b927 | ||
|
|
2853f07875 | ||
|
|
4e6adc07a1 | ||
|
|
6a5ed4f418 | ||
|
|
ef25698d3d | ||
|
|
429dcc7000 | ||
|
|
ab3f994626 | ||
|
|
5f8235fcfc | ||
|
|
db325d0fde | ||
|
|
8dc1f49ac7 | ||
|
|
9fe2b651ed | ||
|
|
24e4b75c35 | ||
|
|
34174abf26 | ||
|
|
af778312cb | ||
|
|
280f5614ef | ||
|
|
8566a46793 | ||
|
|
af3c3f4cbe | ||
|
|
034a44e625 | ||
|
|
84155fdff7 | ||
|
|
8255c913a3 | ||
|
|
4d4d17669b | ||
|
|
540a0422f5 | ||
|
|
de4d7d6273 | ||
|
|
1345d95589 | ||
|
|
a5bc19dd69 | ||
|
|
25b143c8cc | ||
|
|
82cca959e4 | ||
|
|
d52374a0b6 | ||
|
|
c71a18ca07 | ||
|
|
8d73ae2cc0 | ||
|
|
7b0c74ca3e | ||
|
|
62be9f9064 | ||
|
|
2fdc113d93 | ||
|
|
b70fb543f5 | ||
|
|
31c88f9f5a | ||
|
|
af3a818f12 | ||
|
|
a07532d4c7 | ||
|
|
fb449ca4bc | ||
|
|
4da65643c0 | ||
|
|
bf64db474c | ||
|
|
344a14416d | ||
|
|
be47a0c5db | ||
|
|
808b980301 | ||
|
|
3528480562 | ||
|
|
6bd263d23f | ||
|
|
2b9aa3864b | ||
|
|
81155caf88 | ||
|
|
c7c057483d | ||
|
|
7f5170ae4d | ||
|
|
49b8693b11 | ||
|
|
d7b0a14eb5 | ||
|
|
8996cb1e18 | ||
|
|
38f278cce0 | ||
|
|
bb4e475044 | ||
|
|
4fbe36a8e9 | ||
|
|
a1a40b562a | ||
|
|
3a4a1bb005 | ||
|
|
4f8c10c1aa | ||
|
|
4833348769 | ||
|
|
ad07d59a78 | ||
|
|
e8aaabd1d3 | ||
|
|
d7d7396eeb | ||
|
|
41538860cd | ||
|
|
5c9f8a0cf0 | ||
|
|
11c997c3e9 | ||
|
|
1b1184a5e1 | ||
|
|
4ffcc43ed9 | ||
|
|
4ccaac46a6 | ||
|
|
ba88b84055 | ||
|
|
9086634c8f | ||
|
|
e88f1f1edb | ||
|
|
13c7486f0e | ||
|
|
8e811ba141 | ||
|
|
c5d86afed6 | ||
|
|
43a0cfbee1 | ||
|
|
8567d5f302 | ||
|
|
36d1bddc5b | ||
|
|
bf992680af | ||
|
|
e064298dfc | ||
|
|
e01a10ced5 | ||
|
|
2aa10fb1ed | ||
|
|
66f442ad76 | ||
|
|
11f1ecebcf | ||
|
|
e339cb27f6 | ||
|
|
bce3232b39 | ||
|
|
b08970ce39 | ||
|
|
533f88ceaf | ||
|
|
c8f0469a44 | ||
|
|
a1fc24e532 | ||
|
|
d80daa48df | ||
|
|
92aae2803f | ||
|
|
77cbbbfaf2 | ||
|
|
6c6649f912 | ||
|
|
55fbaabfda | ||
|
|
56c2518936 | ||
|
|
2b48152da6 | ||
|
|
e743e23d6b | ||
|
|
f846f921f2 | ||
|
|
e52a599be6 | ||
|
|
99744bd318 | ||
|
|
1060035838 | ||
|
|
69cc20013e | ||
|
|
3572bfd37d | ||
|
|
a6b82990f5 | ||
|
|
540c1fd9c6 | ||
|
|
46d36cd412 | ||
|
|
a170764fc5 | ||
|
|
f72873a1ce | ||
|
|
82496c30b1 | ||
|
|
cb3c007acd | ||
|
|
cb460a797c | ||
|
|
1b666d01cf | ||
|
|
df24c7d2f8 | ||
|
|
133c15c0e2 | ||
|
|
116e18a19e | ||
|
|
ec0af7c97b | ||
|
|
a34a877737 | ||
|
|
f018794465 | ||
|
|
d45f5e9791 | ||
|
|
04bd0d834c | ||
|
|
ed4f0c4713 | ||
|
|
c747068d4a | ||
|
|
aa0fbdc993 | ||
|
|
b50bfa7277 | ||
|
|
e91988f679 | ||
|
|
3ed1fce3fb | ||
|
|
e3ea0b511a | ||
|
|
c411c663de | ||
|
|
1c6f66fc14 | ||
|
|
1f927a374c | ||
|
|
07c4225aa8 | ||
|
|
42a85ac39f | ||
|
|
16e6d31ee3 | ||
|
|
a6db5b436d | ||
|
|
47675f2e28 | ||
|
|
0642bbefa7 | ||
|
|
43da603d52 | ||
|
|
f9e1f4d111 | ||
|
|
1508564e10 | ||
|
|
c14e6f4dcc | ||
|
|
75b6a20134 | ||
|
|
d82a07c221 | ||
|
|
c6f7dbcaa5 | ||
|
|
82cd099e48 | ||
|
|
546c011dd8 | ||
|
|
10b33ccaf6 | ||
|
|
bc01572176 | ||
|
|
ccd2064722 | ||
|
|
0fb42901dd | ||
|
|
a4280d7457 | ||
|
|
6966ede68b | ||
|
|
27dd14a5a2 | ||
|
|
2494301f1e | ||
|
|
f13143accb | ||
|
|
26d340a917 | ||
|
|
cc75cd4102 | ||
|
|
cf8615b231 | ||
|
|
30f473a2b1 | ||
|
|
4618709baa | ||
|
|
16b098ea42 | ||
|
|
b31c4d407a | ||
|
|
28c36cc5e2 | ||
|
|
6bfbcb842e | ||
|
|
a0eade4fdd | ||
|
|
ee24b7e88a | ||
|
|
c9baddf9a4 | ||
|
|
c5c780a685 | ||
|
|
421aaabf62 | ||
|
|
86788f034f | ||
|
|
232d3758cf | ||
|
|
71bcf9b31d | ||
|
|
bf4ee4f064 | ||
|
|
aa3bdfeb17 | ||
|
|
ce6967d396 | ||
|
|
330065f5e0 | ||
|
|
944db82553 | ||
|
|
c257361f05 | ||
|
|
ffdbfb018a | ||
|
|
cfa2bd6b08 | ||
|
|
51e90c3ce0 | ||
|
|
d69149f43e | ||
|
|
f261663f3d | ||
|
|
e5948dd1d3 | ||
|
|
5f13aab7d8 | ||
|
|
292d489592 | ||
|
|
0a01f20e35 | ||
|
|
2bd08d5c4c | ||
|
|
adae5126db | ||
|
|
dddf1bcb76 | ||
|
|
d23d4b0fd4 | ||
|
|
658f7550b3 | ||
|
|
cfb50ae21e | ||
|
|
9b0a365822 | ||
|
|
97ab130619 | ||
|
|
3578fde290 | ||
|
|
f382da69b8 | ||
|
|
2da3d215c6 | ||
|
|
43ed29c14c | ||
|
|
9df0283689 | ||
|
|
04b82cf4a5 | ||
|
|
274c3012b0 | ||
|
|
2b24a4934f | ||
|
|
692a423072 | ||
|
|
148f55335f | ||
|
|
2f752842a1 | ||
|
|
aff72996a1 | ||
|
|
08e425bcf6 | ||
|
|
454ddc601a | ||
|
|
b025f208a8 | ||
|
|
b60e533b9d | ||
|
|
37af0e0d59 | ||
|
|
ac1de5bce9 | ||
|
|
ef7ff55e07 | ||
|
|
608db5b982 | ||
|
|
8dd69efd48 | ||
|
|
73f7fba793 | ||
|
|
867e2402d2 | ||
|
|
a3b9e61967 | ||
|
|
cd149b68e8 | ||
|
|
cd3583c736 | ||
|
|
441f86f3ed | ||
|
|
f62bea65a1 | ||
|
|
886b574987 | ||
|
|
2888bac275 | ||
|
|
35c9206916 | ||
|
|
c4c5b59312 | ||
|
|
f25fb4e5ac | ||
|
|
868bfec5e6 | ||
|
|
e7c242213a | ||
|
|
862552ead4 | ||
|
|
9d90e0c167 | ||
|
|
a281f227cd | ||
|
|
5b981278db | ||
|
|
c1091ed3d1 | ||
|
|
08aed63455 | ||
|
|
90a550ee4f | ||
|
|
34869fc2a2 | ||
|
|
3deb10156d | ||
|
|
8c0e84de05 | ||
|
|
23be083c39 | ||
|
|
217aafce39 | ||
|
|
03210c63f4 | ||
|
|
a90510f6f2 | ||
|
|
36d91aded6 | ||
|
|
9afe8a1297 | ||
|
|
1e6f272034 | ||
|
|
a1aa2f81ef | ||
|
|
62899ef308 | ||
|
|
7f3396c002 | ||
|
|
453bc18196 | ||
|
|
dbb6b57b76 | ||
|
|
d7137db78c | ||
|
|
5ac4f2d80b | ||
|
|
5ba5271da9 | ||
|
|
b834e3015a | ||
|
|
c8721ded62 | ||
|
|
1e97372d24 | ||
|
|
fd4e111784 | ||
|
|
75094e7e21 | ||
|
|
8db2d674ed | ||
|
|
ffb140fab3 | ||
|
|
e93543983c | ||
|
|
0d066f80ff | ||
|
|
ccca1b2016 | ||
|
|
fec0e31a25 | ||
|
|
d246aa8f6d | ||
|
|
66bfba2258 | ||
|
|
b53b4373cb | ||
|
|
0810f93883 | ||
|
|
a4e696a252 | ||
|
|
0951d08f52 | ||
|
|
dbf367e070 | ||
|
|
6447ba8ec8 | ||
|
|
43e260966f | ||
|
|
b0e301b046 | ||
|
|
c8a9ea4979 | ||
|
|
afb7fc05da | ||
|
|
14124ccca8 | ||
|
|
df5022dbc3 | ||
|
|
015e798a31 | ||
|
|
c19125bb02 | ||
|
|
0e6ac5baf1 | ||
|
|
2c8d1b5b8c | ||
|
|
f7c0c1c21a | ||
|
|
4edd98f7ce | ||
|
|
df0abb7000 | ||
|
|
4f93da307f | ||
|
|
a8765d54aa | ||
|
|
ec0f3d22e7 | ||
|
|
009b75cab6 | ||
|
|
d64668df1e | ||
|
|
8538bec99e | ||
|
|
f983900597 | ||
|
|
8c71488757 | ||
|
|
7aa8c435c9 | ||
|
|
daeb51253d | ||
|
|
1dd4187cd0 | ||
|
|
9e36ebdaab | ||
|
|
aaa0127354 | ||
|
|
e60280c4d6 | ||
|
|
aef7866e29 | ||
|
|
70694e3bb9 |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.19.0b1
|
||||
current_version = 0.19.0
|
||||
parse = (?P<major>\d+)
|
||||
\.(?P<minor>\d+)
|
||||
\.(?P<patch>\d+)
|
||||
|
||||
@@ -2,12 +2,19 @@ version: 2.1
|
||||
jobs:
|
||||
unit:
|
||||
docker: &test_only
|
||||
- image: fishtownanalytics/test-container:9
|
||||
- image: fishtownanalytics/test-container:11
|
||||
environment:
|
||||
DBT_INVOCATION_ENV: circle
|
||||
DOCKER_TEST_DATABASE_HOST: "database"
|
||||
TOX_PARALLEL_NO_SPINNER: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run: tox -e flake8,mypy,unit-py36,unit-py38
|
||||
- run: tox -p -e py36,py37,py38
|
||||
lint:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: tox -e mypy,flake8 -- -v
|
||||
build-wheels:
|
||||
docker: *test_only
|
||||
steps:
|
||||
@@ -19,7 +26,7 @@ jobs:
|
||||
export PYTHON_BIN="${PYTHON_ENV}/bin/python"
|
||||
$PYTHON_BIN -m pip install -U pip setuptools
|
||||
$PYTHON_BIN -m pip install -r requirements.txt
|
||||
$PYTHON_BIN -m pip install -r dev_requirements.txt
|
||||
$PYTHON_BIN -m pip install -r dev-requirements.txt
|
||||
/bin/bash ./scripts/build-wheels.sh
|
||||
$PYTHON_BIN ./scripts/collect-dbt-contexts.py > ./dist/context_metadata.json
|
||||
$PYTHON_BIN ./scripts/collect-artifact-schema.py > ./dist/artifact_schemas.json
|
||||
@@ -28,20 +35,22 @@ jobs:
|
||||
- store_artifacts:
|
||||
path: ./dist
|
||||
destination: dist
|
||||
integration-postgres-py36:
|
||||
docker: &test_and_postgres
|
||||
- image: fishtownanalytics/test-container:9
|
||||
integration-postgres:
|
||||
docker:
|
||||
- image: fishtownanalytics/test-container:11
|
||||
environment:
|
||||
DBT_INVOCATION_ENV: circle
|
||||
DOCKER_TEST_DATABASE_HOST: "database"
|
||||
TOX_PARALLEL_NO_SPINNER: 1
|
||||
- image: postgres
|
||||
name: database
|
||||
environment: &pgenv
|
||||
environment:
|
||||
POSTGRES_USER: "root"
|
||||
POSTGRES_PASSWORD: "password"
|
||||
POSTGRES_DB: "dbt"
|
||||
steps:
|
||||
- checkout
|
||||
- run: &setupdb
|
||||
- run:
|
||||
name: Setup postgres
|
||||
command: bash test/setup_db.sh
|
||||
environment:
|
||||
@@ -50,74 +59,39 @@ jobs:
|
||||
PGPASSWORD: password
|
||||
PGDATABASE: postgres
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-postgres-py36
|
||||
name: Postgres integration tests
|
||||
command: tox -p -e py36-postgres,py38-postgres -- -v -n4
|
||||
no_output_timeout: 30m
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-snowflake-py36:
|
||||
integration-snowflake:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-snowflake-py36
|
||||
no_output_timeout: 1h
|
||||
name: Snowflake integration tests
|
||||
command: tox -p -e py36-snowflake,py38-snowflake -- -v -n4
|
||||
no_output_timeout: 30m
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-redshift-py36:
|
||||
integration-redshift:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-redshift-py36
|
||||
name: Redshift integration tests
|
||||
command: tox -p -e py36-redshift,py38-redshift -- -v -n4
|
||||
no_output_timeout: 30m
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-bigquery-py36:
|
||||
integration-bigquery:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-bigquery-py36
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-postgres-py38:
|
||||
docker: *test_and_postgres
|
||||
steps:
|
||||
- checkout
|
||||
- run: *setupdb
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-postgres-py38
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-snowflake-py38:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-snowflake-py38
|
||||
no_output_timeout: 1h
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-redshift-py38:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-redshift-py38
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-bigquery-py38:
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-bigquery-py38
|
||||
name: Bigquery integration test
|
||||
command: tox -p -e py36-bigquery,py38-bigquery -- -v -n4
|
||||
no_output_timeout: 30m
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
|
||||
@@ -125,39 +99,25 @@ workflows:
|
||||
version: 2
|
||||
test-everything:
|
||||
jobs:
|
||||
- lint
|
||||
- unit
|
||||
- integration-postgres-py36:
|
||||
- integration-postgres:
|
||||
requires:
|
||||
- unit
|
||||
- integration-redshift-py36:
|
||||
requires:
|
||||
- integration-postgres-py36
|
||||
- integration-bigquery-py36:
|
||||
requires:
|
||||
- integration-postgres-py36
|
||||
- integration-snowflake-py36:
|
||||
requires:
|
||||
- integration-postgres-py36
|
||||
- integration-postgres-py38:
|
||||
- integration-redshift:
|
||||
requires:
|
||||
- unit
|
||||
- integration-redshift-py38:
|
||||
- integration-bigquery:
|
||||
requires:
|
||||
- integration-postgres-py38
|
||||
- integration-bigquery-py38:
|
||||
- unit
|
||||
- integration-snowflake:
|
||||
requires:
|
||||
- integration-postgres-py38
|
||||
- integration-snowflake-py38:
|
||||
requires:
|
||||
- integration-postgres-py38
|
||||
- unit
|
||||
- build-wheels:
|
||||
requires:
|
||||
- lint
|
||||
- unit
|
||||
- integration-postgres-py36
|
||||
- integration-redshift-py36
|
||||
- integration-bigquery-py36
|
||||
- integration-snowflake-py36
|
||||
- integration-postgres-py38
|
||||
- integration-redshift-py38
|
||||
- integration-bigquery-py38
|
||||
- integration-snowflake-py38
|
||||
- integration-postgres
|
||||
- integration-redshift
|
||||
- integration-bigquery
|
||||
- integration-snowflake
|
||||
|
||||
45
.github/dependabot.yml
vendored
Normal file
45
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
version: 2
|
||||
updates:
|
||||
# python dependencies
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/core"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/bigquery"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/postgres"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/redshift"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/snowflake"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
|
||||
# docker dependencies
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/docker"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
rebase-strategy: "disabled"
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -8,7 +8,7 @@ __pycache__/
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
env*/
|
||||
dbt_env/
|
||||
build/
|
||||
develop-eggs/
|
||||
@@ -85,6 +85,7 @@ target/
|
||||
|
||||
# pycharm
|
||||
.idea/
|
||||
venv/
|
||||
|
||||
# AWS credentials
|
||||
.aws/
|
||||
|
||||
49
ARCHITECTURE.md
Normal file
49
ARCHITECTURE.md
Normal file
@@ -0,0 +1,49 @@
|
||||
The core function of dbt is SQL compilation and execution. Users create projects of dbt resources (models, tests, seeds, snapshots, ...), defined in SQL and YAML files, and they invoke dbt to create, update, or query associated views and tables. Today, dbt makes heavy use of Jinja2 to enable the templating of SQL, and to construct a DAG (Directed Acyclic Graph) from all of the resources in a project. Users can also extend their projects by installing resources (including Jinja macros) from other projects, called "packages."
|
||||
|
||||
## dbt-core
|
||||
|
||||
Most of the python code in the repository is within the `core/dbt` directory. Currently the main subdirectories are:
|
||||
- [`adapters`](core/dbt/adapters): Define base classes for behavior that is likely to differ across databases
|
||||
- [`clients`](core/dbt/clients): Interface with dependencies (agate, jinja) or across operating systems
|
||||
- [`config`](core/dbt/config): Reconcile user-supplied configuration from connection profiles, project files, and Jinja macros
|
||||
- [`context`](core/dbt/context): Build and expose dbt-specific Jinja functionality
|
||||
- [`contracts`](core/dbt/contracts): Define Python objects (dataclasses) that dbt expects to create and validate
|
||||
- [`deps`](core/dbt/deps): Package installation and dependency resolution
|
||||
- [`graph`](core/dbt/graph): Produce a `networkx` DAG of project resources, and selecting those resources given user-supplied criteria
|
||||
- [`include`](core/dbt/include): The dbt "global project," which defines default implementations of Jinja2 macros
|
||||
- [`parser`](core/dbt/parser): Read project files, validate, construct python objects
|
||||
- [`rpc`](core/dbt/rpc): Provide remote procedure call server for invoking dbt, following JSON-RPC 2.0 spec
|
||||
- [`task`](core/dbt/task): Set forth the actions that dbt can perform when invoked
|
||||
|
||||
### Invoking dbt
|
||||
|
||||
There are two supported ways of invoking dbt: from the command line and using an RPC server.
|
||||
|
||||
The "tasks" map to top-level dbt commands. So `dbt run` => task.run.RunTask, etc. Some are more like abstract base classes (GraphRunnableTask, for example) but all the concrete types outside of task/rpc should map to tasks. Currently one executes at a time. The tasks kick off their “Runners” and those do execute in parallel. The parallelism is managed via a thread pool, in GraphRunnableTask.
|
||||
|
||||
core/dbt/include/index.html
|
||||
This is the docs website code. It comes from the dbt-docs repository, and is generated when a release is packaged.
|
||||
|
||||
## Adapters
|
||||
|
||||
dbt uses an adapter-plugin pattern to extend support to different databases, warehouses, query engines, etc. The four core adapters that are in the main repository, contained within the [`plugins`](plugins) subdirectory, are: Postgres Redshift, Snowflake and BigQuery. Other warehouses use adapter plugins defined in separate repositories (e.g. [dbt-spark](https://github.com/fishtown-analytics/dbt-spark), [dbt-presto](https://github.com/fishtown-analytics/dbt-presto)).
|
||||
|
||||
Each adapter is a mix of python, Jinja2, and SQL. The adapter code also makes heavy use of Jinja2 to wrap modular chunks of SQL functionality, define default implementations, and allow plugins to override it.
|
||||
|
||||
Each adapter plugin is a standalone python package that includes:
|
||||
|
||||
- `dbt/include/[name]`: A "sub-global" dbt project, of YAML and SQL files, that reimplements Jinja macros to use the adapter's supported SQL syntax
|
||||
- `dbt/adapters/[name]`: Python modules that inherit, and optionally reimplement, the base adapter classes defined in dbt-core
|
||||
- `setup.py`
|
||||
|
||||
The Postgres adapter code is the most central, and many of its implementations are used as the default defined in the dbt-core global project. The greater the distance of a data technology from Postgres, the more its adapter plugin may need to reimplement.
|
||||
|
||||
## Testing dbt
|
||||
|
||||
The [`test/`](test/) subdirectory includes unit and integration tests that run as continuous integration checks against open pull requests. Unit tests check mock inputs and outputs of specific python functions. Integration tests perform end-to-end dbt invocations against real adapters (Postgres, Redshift, Snowflake, BigQuery) and assert that the results match expectations. See [the contributing guide](CONTRIBUTING.md) for a step-by-step walkthrough of setting up a local development and testing environment.
|
||||
|
||||
## Everything else
|
||||
|
||||
- [docker](docker/): All dbt versions are published as Docker images on DockerHub. This subfolder contains the `Dockerfile` (constant) and `requirements.txt` (one for each version).
|
||||
- [etc](etc/): Images for README
|
||||
- [scripts](scripts/): Helper scripts for testing, releasing, and producing JSON schemas. These are not included in distributions of dbt, not are they rigorously tested—they're just handy tools for the dbt maintainers :)
|
||||
190
CHANGELOG.md
190
CHANGELOG.md
@@ -1,51 +1,194 @@
|
||||
## dbt 0.19.0 (Release TBD)
|
||||
## dbt 0.20.0 (Release TBD)
|
||||
|
||||
### Fixes
|
||||
- Fix exit code from dbt debug not returning a failure when one of the tests fail ([#3017](https://github.com/fishtown-analytics/dbt/issues/3017))
|
||||
- Auto-generated CTEs in tests and ephemeral models have lowercase names to comply with dbt coding conventions ([#3027](https://github.com/fishtown-analytics/dbt/issues/3027), [#3028](https://github.com/fishtown-analytics/dbt/issues/3028))
|
||||
- Fix incorrect error message when a selector does not match any node [#3036](https://github.com/fishtown-analytics/dbt/issues/3036))
|
||||
- Fix variable `_dbt_max_partition` declaration and initialization for BigQuery incremental models ([#2940](https://github.com/fishtown-analytics/dbt/issues/2940), [#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
|
||||
- Moving from 'master' to 'HEAD' default branch in git ([#3057](https://github.com/fishtown-analytics/dbt/issues/3057), [#3104](https://github.com/fishtown-analytics/dbt/issues/3104), [#3117](https://github.com/fishtown-analytics/dbt/issues/3117)))
|
||||
- Requirement on `dataclasses` is relaxed to be between `>=0.6,<0.9` allowing dbt to cohabit with other libraries which required higher versions. ([#3150](https://github.com/fishtown-analytics/dbt/issues/3150), [#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
|
||||
- Add feature to add `_n` alias to same column names in SQL query ([#3147](https://github.com/fishtown-analytics/dbt/issues/3147), [#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
|
||||
- Raise a proper error message if dbt parses a macro twice due to macro duplication or misconfiguration. ([#2449](https://github.com/fishtown-analytics/dbt/issues/2449), [#3165](https://github.com/fishtown-analytics/dbt/pull/3165))
|
||||
- Fix exposures missing in graph context variable. ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
|
||||
- Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
|
||||
|
||||
### Features
|
||||
- Added macro get_partitions_metadata(table) to return partition metadata for partitioned table [#2596](https://github.com/fishtown-analytics/dbt/pull/2596)
|
||||
- Added native python 're' module for regex in jinja templates [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
|
||||
- Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
|
||||
- Add optional configs for `require_partition_filter` and `partition_expiration_days` in BigQuery ([#1843](https://github.com/fishtown-analytics/dbt/issues/1843), [#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
|
||||
- Fix for EOL SQL comments prevent entire line execution ([#2731](https://github.com/fishtown-analytics/dbt/issues/2731), [#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
|
||||
- Add optional `merge_update_columns` config to specify columns to update for `merge` statements in BigQuery and Snowflake ([#1862](https://github.com/fishtown-analytics/dbt/issues/1862), [#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
|
||||
- Use query comment JSON as job labels for BigQuery adapter when `query-comment.job-label` is set to `true` ([#2483](https://github.com/fishtown-analytics/dbt/issues/2483)), ([#3145](https://github.com/fishtown-analytics/dbt/pull/3145))
|
||||
- Set application_name for Postgres connections ([#885](https://github.com/fishtown-analytics/dbt/issues/885), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
|
||||
- Support disabling schema tests, and configuring tests from `dbt_project.yml` ([#3252](https://github.com/fishtown-analytics/dbt/issues/3252),
|
||||
[#3253](https://github.com/fishtown-analytics/dbt/issues/3253), [#3257](https://github.com/fishtown-analytics/dbt/pull/3257))
|
||||
- Add Jinja tag for tests ([#1173](https://github.com/fishtown-analytics/dbt/issues/1173), [#3261](https://github.com/fishtown-analytics/dbt/pull/3261))
|
||||
|
||||
### Under the hood
|
||||
- Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062))
|
||||
- Update script to collect and write json schema for dbt artifacts ([#2870](https://github.com/fishtown-analytics/dbt/issues/2870), [#3065](https://github.com/fishtown-analytics/dbt/pull/3065))
|
||||
- Relax Google Cloud dependency pins to major versions. ([#3156](https://github.com/fishtown-analytics/dbt/pull/3156)
|
||||
- Bump `snowflake-connector-python` and releated dependencies, support Python 3.9 ([#2985](https://github.com/fishtown-analytics/dbt/issues/2985), [#3148](https://github.com/fishtown-analytics/dbt/pull/3148))
|
||||
- General development environment clean up and improve experience running tests locally ([#3194](https://github.com/fishtown-analytics/dbt/issues/3194), [#3204](https://github.com/fishtown-analytics/dbt/pull/3204), [#3228](https://github.com/fishtown-analytics/dbt/pull/3228))
|
||||
- Add a new materialization for tests, update data tests to use test materialization when executing. ([#3154](https://github.com/fishtown-analytics/dbt/issues/3154), [#3181](https://github.com/fishtown-analytics/dbt/pull/3181))
|
||||
- Switch from externally storing parsing state in ParseResult object to using Manifest ([#3163](http://github.com/fishtown-analytics/dbt/issues/3163), [#3219](https://github.com/fishtown-analytics/dbt/pull/3219))
|
||||
- Switch from loading project files in separate parsers to loading in one place([#3244](http://github.com/fishtown-analytics/dbt/issues/3244), [#3248](https://github.com/fishtown-analytics/dbt/pull/3248))
|
||||
|
||||
Contributors:
|
||||
- [@yu-iskw](https://github.com/yu-iskw) ([#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
|
||||
- [@sdebruyn](https://github.com/sdebruyn) / [@lynxcare](https://github.com/lynxcare) ([#3018](https://github.com/fishtown-analytics/dbt/pull/3018))
|
||||
- [@rvacaru](https://github.com/rvacaru) ([#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
|
||||
- [@NiallRees](https://github.com/NiallRees) ([#3028](https://github.com/fishtown-analytics/dbt/pull/3028))
|
||||
- [ran-eh](https://github.com/ran-eh) ([#3036](https://github.com/fishtown-analytics/dbt/pull/3036))
|
||||
- [@pcasteran](https://github.com/pcasteran) ([#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
|
||||
- [@VasiliiSurov](https://github.com/VasiliiSurov) ([#3104](https://github.com/fishtown-analytics/dbt/pull/3104))
|
||||
- [@jmcarp](https://github.com/jmcarp) ([#3145](https://github.com/fishtown-analytics/dbt/pull/3145))
|
||||
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
|
||||
- [@max-sixty](https://github.com/max-sixty) ([#3156](https://github.com/fishtown-analytics/dbt/pull/3156)
|
||||
- [@prratek](https://github.com/prratek) ([#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
|
||||
- [@techytushar](https://github.com/techytushar) ([#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
|
||||
- [@cgopalan](https://github.com/cgopalan) ([#3165](https://github.com/fishtown-analytics/dbt/pull/3165), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
|
||||
- [@fux](https://github.com/fuchsst) ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
|
||||
- [@dmateusp](https://github.com/dmateusp) ([#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
|
||||
|
||||
## dbt 0.19.1 (March 31, 2021)
|
||||
|
||||
## dbt 0.19.1rc2 (March 25, 2021)
|
||||
|
||||
|
||||
### Fixes
|
||||
- Pass service-account scopes to gcloud-based oauth ([#3040](https://github.com/fishtown-analytics/dbt/issues/3040), [#3041](https://github.com/fishtown-analytics/dbt/pull/3041))
|
||||
|
||||
Contributors:
|
||||
- [@yu-iskw](https://github.com/yu-iskw) ([#3041](https://github.com/fishtown-analytics/dbt/pull/3041))
|
||||
|
||||
## dbt 0.19.1rc1 (March 15, 2021)
|
||||
|
||||
### Under the hood
|
||||
- Update code to use Mashumaro 2.0 ([#3138](https://github.com/fishtown-analytics/dbt/pull/3138))
|
||||
- Pin `agate<1.6.2` to avoid installation errors relating to its new dependency `PyICU` ([#3160](https://github.com/fishtown-analytics/dbt/issues/3160), [#3161](https://github.com/fishtown-analytics/dbt/pull/3161))
|
||||
- Add an event to track resource counts ([#3050](https://github.com/fishtown-analytics/dbt/issues/3050), [#3157](https://github.com/fishtown-analytics/dbt/pull/3157))
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix compiled sql for ephemeral models ([#3139](https://github.com/fishtown-analytics/dbt/pull/3139), [#3056](https://github.com/fishtown-analytics/dbt/pull/3056))
|
||||
|
||||
## dbt 0.19.1b2 (February 15, 2021)
|
||||
|
||||
## dbt 0.19.1b1 (February 12, 2021)
|
||||
|
||||
### Fixes
|
||||
|
||||
- On BigQuery, fix regressions for `insert_overwrite` incremental strategy with `int64` and `timestamp` partition columns ([#3063](https://github.com/fishtown-analytics/dbt/issues/3063), [#3095](https://github.com/fishtown-analytics/dbt/issues/3095), [#3098](https://github.com/fishtown-analytics/dbt/issues/3098))
|
||||
|
||||
### Under the hood
|
||||
- Bump werkzeug upper bound dependency to `<v2.0` ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
|
||||
- Performance fixes for many different things ([#2862](https://github.com/fishtown-analytics/dbt/issues/2862), [#3034](https://github.com/fishtown-analytics/dbt/pull/3034))
|
||||
|
||||
Contributors:
|
||||
- [@Bl3f](https://github.com/Bl3f) ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
|
||||
|
||||
|
||||
## dbt 0.19.0 (January 27, 2021)
|
||||
|
||||
## dbt 0.19.0rc3 (January 27, 2021)
|
||||
|
||||
### Under the hood
|
||||
- Cleanup docker resources, use single `docker/Dockerfile` for publishing dbt as a docker image ([dbt-release#3](https://github.com/fishtown-analytics/dbt-release/issues/3), [#3019](https://github.com/fishtown-analytics/dbt/pull/3019))
|
||||
|
||||
## dbt 0.19.0rc2 (January 14, 2021)
|
||||
|
||||
### Fixes
|
||||
- Fix regression with defining exposures and other resources with the same name ([#2969](https://github.com/fishtown-analytics/dbt/issues/2969), [#3009](https://github.com/fishtown-analytics/dbt/pull/3009))
|
||||
- Remove ellipses printed while parsing ([#2971](https://github.com/fishtown-analytics/dbt/issues/2971), [#2996](https://github.com/fishtown-analytics/dbt/pull/2996))
|
||||
|
||||
### Under the hood
|
||||
- Rewrite macro for snapshot_merge_sql to make compatible with other SQL dialects ([#3003](https://github.com/fishtown-analytics/dbt/pull/3003)
|
||||
- Rewrite logic in `snapshot_check_strategy()` to make compatible with other SQL dialects ([#3000](https://github.com/fishtown-analytics/dbt/pull/3000), [#3001](https://github.com/fishtown-analytics/dbt/pull/3001))
|
||||
- Remove version restrictions on `botocore` ([#3006](https://github.com/fishtown-analytics/dbt/pull/3006))
|
||||
- Include `exposures` in start-of-invocation stdout summary: `Found ...` ([#3007](https://github.com/fishtown-analytics/dbt/pull/3007), [#3008](https://github.com/fishtown-analytics/dbt/pull/3008))
|
||||
|
||||
Contributors:
|
||||
- [@mikaelene](https://github.com/mikaelene) ([#3003](https://github.com/fishtown-analytics/dbt/pull/3003))
|
||||
- [@dbeatty10](https://github.com/dbeatty10) ([dbt-adapter-tests#10](https://github.com/fishtown-analytics/dbt-adapter-tests/pull/10))
|
||||
- [@swanderz](https://github.com/swanderz) ([#3000](https://github.com/fishtown-analytics/dbt/pull/3000))
|
||||
- [@stpierre](https://github.com/stpierre) ([#3006](https://github.com/fishtown-analytics/dbt/pull/3006))
|
||||
|
||||
## dbt 0.19.0rc1 (December 29, 2020)
|
||||
|
||||
### Breaking changes
|
||||
|
||||
- Defer if and only if upstream reference does not exist in current environment namespace ([#2909](https://github.com/fishtown-analytics/dbt/issues/2909), [#2946](https://github.com/fishtown-analytics/dbt/pull/2946))
|
||||
- Rationalize run result status reporting and clean up artifact schema ([#2493](https://github.com/fishtown-analytics/dbt/issues/2493), [#2943](https://github.com/fishtown-analytics/dbt/pull/2943))
|
||||
- Add adapter specific query execution info to run results and source freshness results artifacts. Statement call blocks return `response` instead of `status`, and the adapter method `get_status` is now `get_response` ([#2747](https://github.com/fishtown-analytics/dbt/issues/2747), [#2961](https://github.com/fishtown-analytics/dbt/pull/2961))
|
||||
|
||||
### Features
|
||||
- Added macro `get_partitions_metadata(table)` to return partition metadata for BigQuery partitioned tables ([#2552](https://github.com/fishtown-analytics/dbt/pull/2552), [#2596](https://github.com/fishtown-analytics/dbt/pull/2596))
|
||||
- Added `--defer` flag for `dbt test` as well ([#2701](https://github.com/fishtown-analytics/dbt/issues/2701), [#2954](https://github.com/fishtown-analytics/dbt/pull/2954))
|
||||
- Added native python `re` module for regex in jinja templates ([#1755](https://github.com/fishtown-analytics/dbt/pull/2851), [#1755](https://github.com/fishtown-analytics/dbt/pull/2851))
|
||||
- Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
|
||||
- Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866))
|
||||
- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895))
|
||||
- Hourly, monthly and yearly partitions available in BigQuery ([#2476](https://github.com/fishtown-analytics/dbt/issues/2476), [#2903](https://github.com/fishtown-analytics/dbt/pull/2903))
|
||||
- Allow BigQuery to default to the environment's default project ([#2828](https://github.com/fishtown-analytics/dbt/pull/2828), [#2908](https://github.com/fishtown-analytics/dbt/pull/2908))
|
||||
- Rationalize run result status reporting and clean up artifact schema ([#2493](https://github.com/fishtown-analytics/dbt/issues/2493), [#2943](https://github.com/fishtown-analytics/dbt/pull/2943))
|
||||
|
||||
### Fixes
|
||||
- Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
|
||||
- Fix Redshift adapter `get_columns_in_relation` macro to push schema filter down to the `svv_external_columns` view ([#2855](https://github.com/fishtown-analytics/dbt/issues/2854))
|
||||
- Add `unixodbc-dev` package to testing docker image ([#2859](https://github.com/fishtown-analytics/dbt/pull/2859))
|
||||
- Respect `--project-dir` in `dbt clean` command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
|
||||
- Fix Redshift adapter `get_columns_in_relation` macro to push schema filter down to the `svv_external_columns` view ([#2854](https://github.com/fishtown-analytics/dbt/issues/2854), [#2854](https://github.com/fishtown-analytics/dbt/issues/2854))
|
||||
- Increased the supported relation name length in postgres from 29 to 51 ([#2850](https://github.com/fishtown-analytics/dbt/pull/2850))
|
||||
- `dbt list` command always return `0` as exit code ([#2886](https://github.com/fishtown-analytics/dbt/issues/2886), [#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
|
||||
- Set default `materialized` for test node configs to `test` ([#2806](https://github.com/fishtown-analytics/dbt/issues/2806), [#2902](https://github.com/fishtown-analytics/dbt/pull/2902))
|
||||
- Allow `docs` blocks in `exposure` descriptions ([#2913](https://github.com/fishtown-analytics/dbt/issues/2913), [#2920](https://github.com/fishtown-analytics/dbt/pull/2920))
|
||||
- Use original file path instead of absolute path as checksum for big seeds ([#2927](https://github.com/fishtown-analytics/dbt/issues/2927), [#2939](https://github.com/fishtown-analytics/dbt/pull/2939))
|
||||
- Fix KeyError if deferring to a manifest with a since-deleted source, ephemeral model, or test ([#2875](https://github.com/fishtown-analytics/dbt/issues/2875), [#2958](https://github.com/fishtown-analytics/dbt/pull/2958))
|
||||
|
||||
### Under the hood
|
||||
- Bump hologram version to 0.0.11. Add scripts/dtr.py ([#2888](https://github.com/fishtown-analytics/dbt/issues/2840),[#2889](https://github.com/fishtown-analytics/dbt/pull/2889))
|
||||
- Add `unixodbc-dev` package to testing docker image ([#2859](https://github.com/fishtown-analytics/dbt/pull/2859))
|
||||
- Add event tracking for project parser/load times ([#2823](https://github.com/fishtown-analytics/dbt/issues/2823),[#2893](https://github.com/fishtown-analytics/dbt/pull/2893))
|
||||
- Bump `cryptography` version to `>= 3.2` and bump snowflake connector to `2.3.6` ([#2896](https://github.com/fishtown-analytics/dbt/issues/2896), [#2922](https://github.com/fishtown-analytics/dbt/issues/2922))
|
||||
- Widen supported Google Cloud libraries dependencies ([#2794](https://github.com/fishtown-analytics/dbt/pull/2794), [#2877](https://github.com/fishtown-analytics/dbt/pull/2877)).
|
||||
- Bump `hologram` version to `0.0.11`. Add `scripts/dtr.py` ([#2888](https://github.com/fishtown-analytics/dbt/issues/2840),[#2889](https://github.com/fishtown-analytics/dbt/pull/2889))
|
||||
- Bump `hologram` version to `0.0.12`. Add testing support for python3.9 ([#2822](https://github.com/fishtown-analytics/dbt/issues/2822),[#2960](https://github.com/fishtown-analytics/dbt/pull/2960))
|
||||
- Bump the version requirements for `boto3` in dbt-redshift to the upper limit `1.16` to match dbt-redshift and the `snowflake-python-connector` as of version `2.3.6`. ([#2931](https://github.com/fishtown-analytics/dbt/issues/2931), ([#2963](https://github.com/fishtown-analytics/dbt/issues/2963))
|
||||
|
||||
### Docs
|
||||
- Fixed issue where data tests with tags were not showing up in graph viz ([docs#147](https://github.com/fishtown-analytics/dbt-docs/issues/147), [docs#157](https://github.com/fishtown-analytics/dbt-docs/pull/157))
|
||||
|
||||
Contributors:
|
||||
- [@feluelle](https://github.com/feluelle) ([#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
|
||||
- [ran-eh](https://github.com/ran-eh) [#2596](https://github.com/fishtown-analytics/dbt/pull/2596)
|
||||
- [@hochoy](https://github.com/hochoy) [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
|
||||
- [@brangisom](https://github.com/brangisom) [#2855](https://github.com/fishtown-analytics/dbt/pull/2855)
|
||||
- [ran-eh](https://github.com/ran-eh) ([#2596](https://github.com/fishtown-analytics/dbt/pull/2596))
|
||||
- [@hochoy](https://github.com/hochoy) ([#2851](https://github.com/fishtown-analytics/dbt/pull/2851))
|
||||
- [@brangisom](https://github.com/brangisom) ([#2855](https://github.com/fishtown-analytics/dbt/pull/2855))
|
||||
- [@elexisvenator](https://github.com/elexisvenator) ([#2850](https://github.com/fishtown-analytics/dbt/pull/2850))
|
||||
- [@franloza](https://github.com/franloza) ([#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
|
||||
- [@max-sixty](https://github.com/max-sixty) ([#2877](https://github.com/fishtown-analytics/dbt/pull/2877), [#2908](https://github.com/fishtown-analytics/dbt/pull/2908))
|
||||
- [@rsella](https://github.com/rsella) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
|
||||
- [@joellabes](https://github.com/joellabes) ([#2913](https://github.com/fishtown-analytics/dbt/issues/2913))
|
||||
- [@plotneishestvo](https://github.com/plotneishestvo) ([#2896](https://github.com/fishtown-analytics/dbt/issues/2896))
|
||||
- [@db-magnus](https://github.com/db-magnus) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
|
||||
- [@tyang209](https:/github.com/tyang209) ([#2931](https://github.com/fishtown-analytics/dbt/issues/2931))
|
||||
|
||||
## dbt 0.19.0b1 (October 21, 2020)
|
||||
|
||||
### Breaking changes
|
||||
- The format for sources.json, run-results.json, manifest.json, and catalog.json has changed to include a common metadata field ([#2761](https://github.com/fishtown-analytics/dbt/issues/2761), [#2778](https://github.com/fishtown-analytics/dbt/pull/2778), [#2763](https://github.com/fishtown-analytics/dbt/issues/2763), [#2784](https://github.com/fishtown-analytics/dbt/pull/2784), [#2764](https://github.com/fishtown-analytics/dbt/issues/2764), [#2785](https://github.com/fishtown-analytics/dbt/pull/2785))
|
||||
- The format for `sources.json`, `run-results.json`, `manifest.json`, and `catalog.json` has changed:
|
||||
- Each now has a common metadata dictionary ([#2761](https://github.com/fishtown-analytics/dbt/issues/2761), [#2778](https://github.com/fishtown-analytics/dbt/pull/2778)). The contents include: schema and dbt versions ([#2670](https://github.com/fishtown-analytics/dbt/issues/2670), [#2767](https://github.com/fishtown-analytics/dbt/pull/2767)); `invocation_id` ([#2763](https://github.com/fishtown-analytics/dbt/issues/2763), [#2784](https://github.com/fishtown-analytics/dbt/pull/2784)); custom environment variables prefixed with `DBT_ENV_CUSTOM_ENV_` ([#2764](https://github.com/fishtown-analytics/dbt/issues/2764), [#2785](https://github.com/fishtown-analytics/dbt/pull/2785)); cli and rpc arguments in the `run_results.json` ([#2510](https://github.com/fishtown-analytics/dbt/issues/2510), [#2813](https://github.com/fishtown-analytics/dbt/pull/2813)).
|
||||
- Remove `injected_sql` from manifest nodes, use `compiled_sql` instead ([#2762](https://github.com/fishtown-analytics/dbt/issues/2762), [#2834](https://github.com/fishtown-analytics/dbt/pull/2834))
|
||||
|
||||
### Features
|
||||
- dbt will compare configurations using the un-rendered form of the config block in dbt_project.yml ([#2713](https://github.com/fishtown-analytics/dbt/issues/2713), [#2735](https://github.com/fishtown-analytics/dbt/pull/2735))
|
||||
- dbt will compare configurations using the un-rendered form of the config block in `dbt_project.yml` ([#2713](https://github.com/fishtown-analytics/dbt/issues/2713), [#2735](https://github.com/fishtown-analytics/dbt/pull/2735))
|
||||
- Added state and defer arguments to the RPC client, matching the CLI ([#2678](https://github.com/fishtown-analytics/dbt/issues/2678), [#2736](https://github.com/fishtown-analytics/dbt/pull/2736))
|
||||
- Added schema and dbt versions to JSON artifacts ([#2670](https://github.com/fishtown-analytics/dbt/issues/2670), [#2767](https://github.com/fishtown-analytics/dbt/pull/2767))
|
||||
- Added ability to snapshot hard-deleted records (opt-in with `invalidate_hard_deletes` config option). ([#249](https://github.com/fishtown-analytics/dbt/issues/249), [#2749](https://github.com/fishtown-analytics/dbt/pull/2749))
|
||||
- Added revival for snapshotting hard-deleted records. ([#2819](https://github.com/fishtown-analytics/dbt/issues/2819), [#2821](https://github.com/fishtown-analytics/dbt/pull/2821))
|
||||
- Improved error messages for YAML selectors ([#2700](https://github.com/fishtown-analytics/dbt/issues/2700), [#2781](https://github.com/fishtown-analytics/dbt/pull/2781))
|
||||
- Save manifest at the same time we save the run_results at the end of a run ([#2765](https://github.com/fishtown-analytics/dbt/issues/2765), [#2799](https://github.com/fishtown-analytics/dbt/pull/2799))
|
||||
- Added dbt_invocation_id for each BigQuery job to enable performance analysis ([#2808](https://github.com/fishtown-analytics/dbt/issues/2808), [#2809](https://github.com/fishtown-analytics/dbt/pull/2809))
|
||||
- Save cli and rpc arguments in run_results.json ([#2510](https://github.com/fishtown-analytics/dbt/issues/2510), [#2813](https://github.com/fishtown-analytics/dbt/pull/2813))
|
||||
- Added `dbt_invocation_id` for each BigQuery job to enable performance analysis ([#2808](https://github.com/fishtown-analytics/dbt/issues/2808), [#2809](https://github.com/fishtown-analytics/dbt/pull/2809))
|
||||
- Added support for BigQuery connections using refresh tokens ([#2344](https://github.com/fishtown-analytics/dbt/issues/2344), [#2805](https://github.com/fishtown-analytics/dbt/pull/2805))
|
||||
- Remove injected_sql from manifest nodes ([#2762](https://github.com/fishtown-analytics/dbt/issues/2762), [#2834](https://github.com/fishtown-analytics/dbt/pull/2834))
|
||||
|
||||
### Under the hood
|
||||
- Save `manifest.json` at the same time we save the `run_results.json` at the end of a run ([#2765](https://github.com/fishtown-analytics/dbt/issues/2765), [#2799](https://github.com/fishtown-analytics/dbt/pull/2799))
|
||||
- Added strategy-specific validation to improve the relevancy of compilation errors for the `timestamp` and `check` snapshot strategies. (([#2787](https://github.com/fishtown-analytics/dbt/issues/2787), [#2791](https://github.com/fishtown-analytics/dbt/pull/2791))
|
||||
- Changed rpc test timeouts to avoid locally run test failures ([#2803](https://github.com/fishtown-analytics/dbt/issues/2803),[#2804](https://github.com/fishtown-analytics/dbt/pull/2804))
|
||||
- Added a debug_query on the base adapter that will allow plugin authors to create custom debug queries ([#2751](https://github.com/fishtown-analytics/dbt/issues/2751),[#2871](https://github.com/fishtown-analytics/dbt/pull/2817))
|
||||
- Added a `debug_query` on the base adapter that will allow plugin authors to create custom debug queries ([#2751](https://github.com/fishtown-analytics/dbt/issues/2751),[#2871](https://github.com/fishtown-analytics/dbt/pull/2817))
|
||||
|
||||
### Docs
|
||||
- Add select/deselect option in DAG view dropups. ([docs#98](https://github.com/fishtown-analytics/dbt-docs/issues/98), [docs#138](https://github.com/fishtown-analytics/dbt-docs/pull/138))
|
||||
@@ -59,6 +202,15 @@ Contributors:
|
||||
- [@Mr-Nobody99](https://github.com/Mr-Nobody99) ([docs#138](https://github.com/fishtown-analytics/dbt-docs/pull/138))
|
||||
- [@jplynch77](https://github.com/jplynch77) ([docs#139](https://github.com/fishtown-analytics/dbt-docs/pull/139))
|
||||
|
||||
## dbt 0.18.2 (March 22, 2021)
|
||||
|
||||
## dbt 0.18.2rc1 (March 12, 2021)
|
||||
|
||||
### Under the hood
|
||||
- Pin `agate<1.6.2` to avoid installation errors relating to its new dependency
|
||||
`PyICU` ([#3160](https://github.com/fishtown-analytics/dbt/issues/3160),
|
||||
[#3161](https://github.com/fishtown-analytics/dbt/pull/3161))
|
||||
|
||||
## dbt 0.18.1 (October 13, 2020)
|
||||
|
||||
## dbt 0.18.1rc1 (October 01, 2020)
|
||||
@@ -160,7 +312,6 @@ Contributors:
|
||||
- Add relevance criteria to site search ([docs#113](https://github.com/fishtown-analytics/dbt-docs/pull/113))
|
||||
- Support new selector methods, intersection, and arbitrary parent/child depth in DAG selection syntax ([docs#118](https://github.com/fishtown-analytics/dbt-docs/pull/118))
|
||||
- Revise anonymous event tracking: simpler URL fuzzing; differentiate between Cloud-hosted and non-Cloud docs ([docs#121](https://github.com/fishtown-analytics/dbt-docs/pull/121))
|
||||
|
||||
Contributors:
|
||||
- [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677))
|
||||
- [@kconvey](https://github.com/kconvey) ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), [#2709](https://github.com/fishtown-analytics/dbt/pull/2709)), [#2711](https://github.com/fishtown-analytics/dbt/pull/2711))
|
||||
@@ -880,7 +1031,6 @@ Thanks for your contributions to dbt!
|
||||
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689))
|
||||
|
||||
|
||||
|
||||
## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)
|
||||
|
||||
### Overview
|
||||
|
||||
207
CONTRIBUTING.md
207
CONTRIBUTING.md
@@ -1,79 +1,86 @@
|
||||
# Contributing to dbt
|
||||
# Contributing to `dbt`
|
||||
|
||||
1. [About this document](#about-this-document)
|
||||
2. [Proposing a change](#proposing-a-change)
|
||||
3. [Getting the code](#getting-the-code)
|
||||
4. [Setting up an environment](#setting-up-an-environment)
|
||||
5. [Running dbt in development](#running-dbt-in-development)
|
||||
5. [Running `dbt` in development](#running-dbt-in-development)
|
||||
6. [Testing](#testing)
|
||||
7. [Submitting a Pull Request](#submitting-a-pull-request)
|
||||
|
||||
## About this document
|
||||
|
||||
This document is a guide intended for folks interested in contributing to dbt. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using dbt, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
|
||||
This document is a guide intended for folks interested in contributing to `dbt`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
|
||||
|
||||
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the #development channel on [slack](community.getdbt.com).
|
||||
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the `#dbt-core-development` channel on [slack](https://community.getdbt.com).
|
||||
|
||||
### Signing the CLA
|
||||
|
||||
Please note that all contributors to dbt must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the dbt codebase. If you are unable to sign the CLA, then the dbt maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
|
||||
Please note that all contributors to `dbt` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt` codebase. If you are unable to sign the CLA, then the `dbt` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
|
||||
|
||||
## Proposing a change
|
||||
|
||||
dbt is Apache 2.0-licensed open source software. dbt is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
|
||||
`dbt` is Apache 2.0-licensed open source software. `dbt` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
|
||||
|
||||
### Defining the problem
|
||||
|
||||
If you have an idea for a new feature or if you've discovered a bug in dbt, the first step is to open an issue. Please check the list of [open issues](https://github.com/fishtown-analytics/dbt/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The dbt maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
|
||||
If you have an idea for a new feature or if you've discovered a bug in `dbt`, the first step is to open an issue. Please check the list of [open issues](https://github.com/fishtown-analytics/dbt/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
|
||||
|
||||
**Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
|
||||
> **Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
|
||||
|
||||
### Discussing the idea
|
||||
|
||||
After you open an issue, a dbt maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The dbt maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
|
||||
After you open an issue, a `dbt` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
|
||||
|
||||
### Submitting a change
|
||||
|
||||
If an issue is appropriately well scoped and describes a beneficial change to the dbt codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
|
||||
If an issue is appropriately well scoped and describes a beneficial change to the `dbt` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
|
||||
|
||||
The dbt maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/fishtown-analytics/dbt/contribute) page.
|
||||
The `dbt` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/fishtown-analytics/dbt/contribute) page.
|
||||
|
||||
Here's a good workflow:
|
||||
- Comment on the open issue, expressing your interest in contributing the required code change
|
||||
- Outline your planned implementation. If you want help getting started, ask!
|
||||
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the dbt maintainers will work with you to review your code.
|
||||
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding dbt's [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
|
||||
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt` maintainers will work with you to review your code.
|
||||
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
|
||||
|
||||
In some cases, the right resolution to an open issue might be tangential to the dbt codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the dbt maintainers are unwilling or unable to incorporate into the dbt codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the dbt repository, the issue will be tagged with the `wontfix` label (see below) and closed.
|
||||
In some cases, the right resolution to an open issue might be tangential to the `dbt` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt` maintainers are unwilling or unable to incorporate into the `dbt` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
|
||||
|
||||
### Using issue labels
|
||||
|
||||
The dbt maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the dbt codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
|
||||
The `dbt` maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the `dbt` codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
|
||||
|
||||
| tag | description |
|
||||
| --- | ----------- |
|
||||
| [triage](https://github.com/fishtown-analytics/dbt/labels/triage) | This is a new issue which has not yet been reviewed by a dbt maintainer. This label is removed when a maintainer reviews and responds to the issue. |
|
||||
| [bug](https://github.com/fishtown-analytics/dbt/labels/bug) | This issue represents a defect or regression in dbt |
|
||||
| [enhancement](https://github.com/fishtown-analytics/dbt/labels/enhancement) | This issue represents net-new functionality in dbt |
|
||||
| [good first issue](https://github.com/fishtown-analytics/dbt/labels/good%20first%20issue) | This issue does not require deep knowledge of the dbt codebase to implement. This issue is appropriate for a first-time contributor. |
|
||||
| [help wanted](https://github.com/fishtown-analytics/dbt/labels/help%20wanted) / [discussion](https://github.com/fishtown-analytics/dbt/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
|
||||
| [duplicate](https://github.com/fishtown-analytics/dbt/issues/duplicate) | This issue is functionally identical to another open issue. The dbt maintainers will close this issue and encourage community members to focus conversation on the other one. |
|
||||
| [snoozed](https://github.com/fishtown-analytics/dbt/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The dbt maintainers will revist these issues periodically and re-prioritize them accordingly. |
|
||||
| [stale](https://github.com/fishtown-analytics/dbt/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by dbt maintainers, but they can be re-opened if the discussion is restarted. |
|
||||
| [wontfix](https://github.com/fishtown-analytics/dbt/labels/wontfix) | This issue does not require a code change in the dbt repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
|
||||
| [triage](https://github.com/fishtown-analytics/dbt/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
|
||||
| [bug](https://github.com/fishtown-analytics/dbt/labels/bug) | This issue represents a defect or regression in `dbt` |
|
||||
| [enhancement](https://github.com/fishtown-analytics/dbt/labels/enhancement) | This issue represents net-new functionality in `dbt` |
|
||||
| [good first issue](https://github.com/fishtown-analytics/dbt/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt` codebase to implement. This issue is appropriate for a first-time contributor. |
|
||||
| [help wanted](https://github.com/fishtown-analytics/`dbt`/labels/help%20wanted) / [discussion](https://github.com/fishtown-analytics/dbt/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
|
||||
| [duplicate](https://github.com/fishtown-analytics/dbt/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt` maintainers will close this issue and encourage community members to focus conversation on the other one. |
|
||||
| [snoozed](https://github.com/fishtown-analytics/dbt/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt` maintainers will revist these issues periodically and re-prioritize them accordingly. |
|
||||
| [stale](https://github.com/fishtown-analytics/dbt/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt` maintainers, but they can be re-opened if the discussion is restarted. |
|
||||
| [wontfix](https://github.com/fishtown-analytics/dbt/labels/wontfix) | This issue does not require a code change in the `dbt` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
|
||||
|
||||
#### Branching Strategy
|
||||
|
||||
`dbt` has three types of branches:
|
||||
|
||||
- **Trunks** are where active development of the next release takes place. There is one trunk named `develop` at the time of writing this, and will be the default branch of the repository.
|
||||
- **Release Branches** track a specific, not yet complete release of `dbt`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt`.
|
||||
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk brnach or a specific release branch.
|
||||
|
||||
## Getting the code
|
||||
|
||||
### Installing git
|
||||
|
||||
You will need `git` in order to download and modify the dbt source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
|
||||
You will need `git` in order to download and modify the `dbt` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
|
||||
|
||||
### External contributors
|
||||
|
||||
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to dbt by forking the dbt repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
|
||||
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to `dbt` by forking the `dbt` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
|
||||
|
||||
1. fork the dbt repository
|
||||
1. fork the `dbt` repository
|
||||
2. clone your fork locally
|
||||
3. check out a new branch for your proposed changes
|
||||
4. push changes to your fork
|
||||
@@ -81,32 +88,30 @@ If you are not a member of the `fishtown-analytics` GitHub organization, you can
|
||||
|
||||
### Core contributors
|
||||
|
||||
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the dbt repo. Rather than
|
||||
forking dbt to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
|
||||
|
||||
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the `dbt` repo. Rather than forking `dbt` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
|
||||
|
||||
## Setting up an environment
|
||||
|
||||
There are some tools that will be helpful to you in developing locally. While this is the list relevant for dbt development, many of these tools are used commonly across open-source python projects.
|
||||
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt` development, many of these tools are used commonly across open-source python projects.
|
||||
|
||||
### Tools
|
||||
|
||||
A short list of tools used in dbt testing that will be helpful to your understanding:
|
||||
A short list of tools used in `dbt` testing that will be helpful to your understanding:
|
||||
|
||||
- [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage dependencies
|
||||
- [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions
|
||||
- [pytest](https://docs.pytest.org/en/latest/) to discover/run tests
|
||||
- [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
|
||||
- [flake8](https://gitlab.com/pycqa/flake8) for code linting
|
||||
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.6, Python 3.7, Python 3.8, and Python 3.9
|
||||
- [`pytest`](https://docs.pytest.org/en/latest/) to discover/run tests
|
||||
- [`make`](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
|
||||
- [`flake8`](https://flake8.pycqa.org/en/latest/) for code linting
|
||||
- [`mypy`](https://mypy.readthedocs.io/en/stable/) for static type checking
|
||||
- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/)
|
||||
|
||||
A deep understanding of these tools in not required to effectively contribute to dbt, but we recommend checking out the attached documentation if you're interested in learning more about them.
|
||||
A deep understanding of these tools in not required to effectively contribute to `dbt`, but we recommend checking out the attached documentation if you're interested in learning more about them.
|
||||
|
||||
#### virtual environments
|
||||
|
||||
We strongly recommend using virtual environments when developing code in dbt. We recommend creating this virtualenv
|
||||
in the root of the dbt repository. To create a new virtualenv, run:
|
||||
```
|
||||
We strongly recommend using virtual environments when developing code in `dbt`. We recommend creating this virtualenv
|
||||
in the root of the `dbt` repository. To create a new virtualenv, run:
|
||||
```sh
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
```
|
||||
@@ -115,30 +120,32 @@ This will create and activate a new Python virtual environment.
|
||||
|
||||
#### docker and docker-compose
|
||||
|
||||
Docker and docker-compose are both used in testing. For macOS, the easiest thing to do is to [download docker for mac](https://store.docker.com/editions/community/docker-ce-desktop-mac). You'll need to make an account. On Linux, you can use one of the packages [here](https://docs.docker.com/install/#server). We recommend installing from docker.com instead of from your package manager. On Linux you also have to install docker-compose separately, following [these instructions](https://docs.docker.com/compose/install/#install-compose).
|
||||
Docker and docker-compose are both used in testing. Specific instructions for you OS can be found [here](https://docs.docker.com/get-docker/).
|
||||
|
||||
|
||||
#### postgres (optional)
|
||||
|
||||
For testing, and later in the examples in this document, you may want to have `psql` available so you can poke around in the database and see what happened. We recommend that you use [homebrew](https://brew.sh/) for that on macOS, and your package manager on Linux. You can install any version of the postgres client that you'd like. On macOS, with homebrew setup, you can run:
|
||||
|
||||
```
|
||||
```sh
|
||||
brew install postgresql
|
||||
```
|
||||
|
||||
## Running dbt in development
|
||||
## Running `dbt` in development
|
||||
|
||||
### Installation
|
||||
|
||||
First make sure that you set up your `virtualenv` as described in section _Setting up an environment_. Next, install dbt (and its dependencies) with:
|
||||
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Next, install `dbt` (and its dependencies) with:
|
||||
|
||||
```
|
||||
pip install -r editable_requirements.txt
|
||||
```sh
|
||||
make dev
|
||||
# or
|
||||
pip install -r dev-requirements.txt -r editable-requirements.txt
|
||||
```
|
||||
|
||||
When dbt is installed from source in this way, any changes you make to the dbt source code will be reflected immediately in your next `dbt` run.
|
||||
When `dbt` is installed this way, any changes you make to the `dbt` source code will be reflected immediately in your next `dbt` run.
|
||||
|
||||
### Running dbt
|
||||
### Running `dbt`
|
||||
|
||||
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
|
||||
|
||||
@@ -146,77 +153,79 @@ Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as
|
||||
|
||||
## Testing
|
||||
|
||||
Getting the dbt integration tests set up in your local environment will be very helpful as you start to make changes to your local version of dbt. The section that follows outlines some helpful tips for setting up the test environment.
|
||||
Getting the `dbt` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt`. The section that follows outlines some helpful tips for setting up the test environment.
|
||||
|
||||
### Running tests via Docker
|
||||
Since `dbt` works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on [_Submitting a Pull Request_](#submitting-a-pull-request) below for more information on this CI setup.
|
||||
|
||||
dbt's unit and integration tests run in Docker. Because dbt works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on [_Submitting a Pull Request_](#submitting-a-pull-request) below for more information on this CI setup.
|
||||
### Initial setup
|
||||
|
||||
We recommend starting with `dbt`'s Postgres tests. These tests cover most of the functionality in `dbt`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
|
||||
|
||||
### Specifying your test credentials
|
||||
|
||||
dbt uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against dbt. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials:
|
||||
|
||||
```
|
||||
cp test.env.sample test.env
|
||||
atom test.env # supply your credentials
|
||||
```
|
||||
|
||||
We recommend starting with dbt's Postgres tests. These tests cover most of the functionality in dbt, are the fastest to run, and are the easiest to set up. dbt's test suite runs Postgres in a Docker container, so no setup should be required to run these tests.
|
||||
|
||||
If you additionally want to test Snowflake, Bigquery, or Redshift, locally you'll need to get credentials and add them to the `test.env` file. In general, it's most important to have successful unit and Postgres tests. Once you open a PR, dbt will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
|
||||
|
||||
### Test commands
|
||||
|
||||
dbt's unit tests and Python linter can be run with:
|
||||
|
||||
```
|
||||
make test-unit
|
||||
```
|
||||
|
||||
To run the Postgres + Python 3.6 integration tests, you'll have to do one extra step of setting up the test database:
|
||||
|
||||
```sh
|
||||
make setup-db
|
||||
```
|
||||
or, alternatively:
|
||||
```sh
|
||||
docker-compose up -d database
|
||||
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
|
||||
```
|
||||
|
||||
To run a quick test for Python3 integration tests on Postgres, you can run:
|
||||
`dbt` uses test credentials specified in a `test.env` file in the root of the repository for non-Postgres databases. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against `dbt`. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials. This step is only required to use non-Postgres databases.
|
||||
|
||||
```
|
||||
make test-quick
|
||||
cp test.env.sample test.env
|
||||
$EDITOR test.env
|
||||
```
|
||||
|
||||
To run tests for a specific database, invoke `tox` directly with the required flags:
|
||||
```
|
||||
# Run Postgres py36 tests
|
||||
docker-compose run test tox -e integration-postgres-py36 -- -x
|
||||
> In general, it's most important to have successful unit and Postgres tests. Once you open a PR, `dbt` will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
|
||||
|
||||
# Run Snowflake py36 tests
|
||||
docker-compose run test tox -e integration-snowflake-py36 -- -x
|
||||
### Test commands
|
||||
|
||||
# Run BigQuery py36 tests
|
||||
docker-compose run test tox -e integration-bigquery-py36 -- -x
|
||||
There are a few methods for running tests locally.
|
||||
|
||||
# Run Redshift py36 tests
|
||||
docker-compose run test tox -e integration-redshift-py36 -- -x
|
||||
```
|
||||
#### Makefile
|
||||
|
||||
To run a specific test by itself:
|
||||
```
|
||||
docker-compose run test tox -e explicit-py36 -- -s -x -m profile_{adapter} {path_to_test_file_or_folder}
|
||||
```
|
||||
E.g.
|
||||
```
|
||||
docker-compose run test tox -e explicit-py36 -- -s -x -m profile_snowflake test/integration/001_simple_copy_test
|
||||
```
|
||||
There are multiple targets in the Makefile to run common test suites and code
|
||||
checks, most notably:
|
||||
|
||||
See the `Makefile` contents for more some other examples of ways to run `tox`.
|
||||
```sh
|
||||
# Runs unit tests with py38 and code checks in parallel.
|
||||
make test
|
||||
# Runs postgres integration tests with py38 in "fail fast" mode.
|
||||
make integration
|
||||
```
|
||||
> These make targets assume you have a recent version of [`tox`](https://tox.readthedocs.io/en/latest/) installed locally,
|
||||
> unless you use choose a Docker container to run tests. Run `make help` for more info.
|
||||
|
||||
Check out the other targets in the Makefile to see other commonly used test
|
||||
suites.
|
||||
|
||||
#### `tox`
|
||||
|
||||
[`tox`](https://tox.readthedocs.io/en/latest/) takes care of managing virtualenvs and install dependencies in order to run
|
||||
tests. You can also run tests in parallel, for example, you can run unit tests
|
||||
for Python 3.6, Python 3.7, Python 3.8, `flake8` checks, and `mypy` checks in
|
||||
parallel with `tox -p`. Also, you can run unit tests for specific python versions
|
||||
with `tox -e py36`. The configuration for these tests in located in `tox.ini`.
|
||||
|
||||
#### `pytest`
|
||||
|
||||
Finally, you can also run a specific test or group of tests using [`pytest`](https://docs.pytest.org/en/latest/) directly. With a virtualenv
|
||||
active and dev dependencies installed you can do things like:
|
||||
```sh
|
||||
# run specific postgres integration tests
|
||||
python -m pytest -m profile_postgres test/integration/001_simple_copy_test
|
||||
# run all unit tests in a file
|
||||
python -m pytest test/unit/test_graph.py
|
||||
# run a specific unit test
|
||||
python -m pytest test/unit/test_graph.py::GraphTest::test__dependency_list
|
||||
```
|
||||
> [Here](https://docs.pytest.org/en/reorganize-docs/new-docs/user/commandlineuseful.html)
|
||||
> is a list of useful command-line options for `pytest` to use while developing.
|
||||
## Submitting a Pull Request
|
||||
|
||||
Fishtown Analytics provides a sandboxed Redshift, Snowflake, and BigQuery database for use in a CI environment. When pull requests are submitted to the `fishtown-analytics/dbt` repo, GitHub will trigger automated tests in CircleCI and Azure Pipelines.
|
||||
|
||||
A dbt maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
|
||||
A `dbt` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
|
||||
|
||||
Once all tests are passing and your PR has been approved, a dbt maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
|
||||
Once all tests are passing and your PR has been approved, a `dbt` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
|
||||
|
||||
@@ -46,9 +46,7 @@ RUN curl -LO https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_V
|
||||
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
|
||||
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
|
||||
|
||||
RUN pip3 install -U "tox==3.14.4" wheel "six>=1.14.0,<1.15.0" "virtualenv==20.0.3" setuptools
|
||||
# tox fails if the 'python' interpreter (python2) doesn't have `tox` installed
|
||||
RUN pip install -U "tox==3.14.4" "six>=1.14.0,<1.15.0" "virtualenv==20.0.3" setuptools
|
||||
RUN pip3 install -U tox wheel six setuptools
|
||||
|
||||
# These args are passed in via docker-compose, which reads then from the .env file.
|
||||
# On Linux, run `make .env` to create the .env file for the current user.
|
||||
101
Makefile
101
Makefile
@@ -1,29 +1,81 @@
|
||||
.PHONY: install test test-unit test-integration
|
||||
.DEFAULT_GOAL:=help
|
||||
|
||||
changed_tests := `git status --porcelain | grep '^\(M\| M\|A\| A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'`
|
||||
# Optional flag to run target in a docker container.
|
||||
# (example `make test USE_DOCKER=true`)
|
||||
ifeq ($(USE_DOCKER),true)
|
||||
DOCKER_CMD := docker-compose run --rm test
|
||||
endif
|
||||
|
||||
install:
|
||||
pip install -e .
|
||||
.PHONY: dev
|
||||
dev: ## Installs dbt-* packages in develop mode along with development dependencies.
|
||||
pip install -r dev-requirements.txt -r editable-requirements.txt
|
||||
|
||||
test: .env
|
||||
@echo "Full test run starting..."
|
||||
@time docker-compose run test tox
|
||||
.PHONY: mypy
|
||||
mypy: .env ## Runs mypy for static type checking.
|
||||
$(DOCKER_CMD) tox -e mypy
|
||||
|
||||
test-unit: .env
|
||||
@echo "Unit test run starting..."
|
||||
@time docker-compose run test tox -e unit-py36,flake8
|
||||
.PHONY: flake8
|
||||
flake8: .env ## Runs flake8 to enforce style guide.
|
||||
$(DOCKER_CMD) tox -e flake8
|
||||
|
||||
test-integration: .env
|
||||
@echo "Integration test run starting..."
|
||||
@time docker-compose run test tox -e integration-postgres-py36,integration-redshift-py36,integration-snowflake-py36,integration-bigquery-py36
|
||||
.PHONY: lint
|
||||
lint: .env ## Runs all code checks in parallel.
|
||||
$(DOCKER_CMD) tox -p -e flake8,mypy
|
||||
|
||||
test-quick: .env
|
||||
@echo "Integration test run starting..."
|
||||
@time docker-compose run test tox -e integration-postgres-py36 -- -x
|
||||
.PHONY: unit
|
||||
unit: .env ## Runs unit tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38
|
||||
|
||||
.PHONY: test
|
||||
test: .env ## Runs unit tests with py38 and code checks in parallel.
|
||||
$(DOCKER_CMD) tox -p -e py38,flake8,mypy
|
||||
|
||||
.PHONY: integration
|
||||
integration: .env integration-postgres ## Alias for integration-postgres.
|
||||
|
||||
.PHONY: integration-fail-fast
|
||||
integration-fail-fast: .env integration-postgres-fail-fast ## Alias for integration-postgres-fail-fast.
|
||||
|
||||
.PHONY: integration-postgres
|
||||
integration-postgres: .env ## Runs postgres integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-postgres -- -nauto
|
||||
|
||||
.PHONY: integration-postgres-fail-fast
|
||||
integration-postgres-fail-fast: .env ## Runs postgres integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-postgres -- -x -nauto
|
||||
|
||||
.PHONY: integration-redshift
|
||||
integration-redshift: .env ## Runs redshift integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-redshift -- -nauto
|
||||
|
||||
.PHONY: integration-redshift-fail-fast
|
||||
integration-redshift-fail-fast: .env ## Runs redshift integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-redshift -- -x -nauto
|
||||
|
||||
.PHONY: integration-snowflake
|
||||
integration-snowflake: .env ## Runs snowflake integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-snowflake -- -nauto
|
||||
|
||||
.PHONY: integration-snowflake-fail-fast
|
||||
integration-snowflake-fail-fast: .env ## Runs snowflake integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-snowflake -- -x -nauto
|
||||
|
||||
.PHONY: integration-bigquery
|
||||
integration-bigquery: .env ## Runs bigquery integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-bigquery -- -nauto
|
||||
|
||||
.PHONY: integration-bigquery-fail-fast
|
||||
integration-bigquery-fail-fast: .env ## Runs bigquery integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-bigquery -- -x -nauto
|
||||
|
||||
.PHONY: setup-db
|
||||
setup-db: ## Setup Postgres database with docker-compose for system testing.
|
||||
docker-compose up -d database
|
||||
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
|
||||
|
||||
# This rule creates a file named .env that is used by docker-compose for passing
|
||||
# the USER_ID and GROUP_ID arguments to the Docker image.
|
||||
.env:
|
||||
.env: ## Setup step for using using docker-compose with make target.
|
||||
@touch .env
|
||||
ifneq ($(OS),Windows_NT)
|
||||
ifneq ($(shell uname -s), Darwin)
|
||||
@@ -31,9 +83,9 @@ ifneq ($(shell uname -s), Darwin)
|
||||
@echo GROUP_ID=$(shell id -g) >> .env
|
||||
endif
|
||||
endif
|
||||
@time docker-compose build
|
||||
|
||||
clean:
|
||||
.PHONY: clean
|
||||
clean: ## Resets development environment.
|
||||
rm -f .coverage
|
||||
rm -rf .eggs/
|
||||
rm -f .env
|
||||
@@ -47,3 +99,14 @@ clean:
|
||||
rm -rf target/
|
||||
find . -type f -name '*.pyc' -delete
|
||||
find . -type d -name '__pycache__' -depth -delete
|
||||
|
||||
.PHONY: help
|
||||
help: ## Show this help message.
|
||||
@echo 'usage: make [target] [USE_DOCKER=true]'
|
||||
@echo
|
||||
@echo 'targets:'
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
||||
@echo
|
||||
@echo 'options:'
|
||||
@echo 'use USE_DOCKER=true to run target in a docker container'
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<p align="center">
|
||||
<img src="/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
|
||||
<img src="https://raw.githubusercontent.com/fishtown-analytics/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
<a href="https://codeclimate.com/github/fishtown-analytics/dbt">
|
||||
@@ -20,7 +20,7 @@
|
||||
|
||||
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
|
||||
|
||||

|
||||

|
||||
|
||||
dbt can be used to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
|
||||
|
||||
@@ -30,7 +30,7 @@ Analysts using dbt can transform their data by simply writing select statements,
|
||||
|
||||
These select statements, or "models", form a dbt project. Models frequently build on top of one another – dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
|
||||
|
||||

|
||||

|
||||
|
||||
## Getting started
|
||||
|
||||
@@ -51,7 +51,7 @@ These select statements, or "models", form a dbt project. Models frequently buil
|
||||
## Reporting bugs and contributing code
|
||||
|
||||
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt/issues/new).
|
||||
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](/CONTRIBUTING.md)
|
||||
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](https://github.com/fishtown-analytics/dbt/blob/HEAD/CONTRIBUTING.md)
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
|
||||
92
RELEASE.md
92
RELEASE.md
@@ -1,92 +0,0 @@
|
||||
### Release Procedure :shipit:
|
||||
|
||||
#### Branching Strategy
|
||||
|
||||
dbt has three types of branches:
|
||||
|
||||
- **Trunks** track the latest release of a minor version of dbt. Historically, we used the `master` branch as the trunk. Each minor version release has a corresponding trunk. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of dbt.
|
||||
- **Release Branches** track a specific, not yet complete release of dbt. These releases are codenamed since we don't always know what their semantic version will be. Example: `dev/lucretia-mott` became `0.11.1`.
|
||||
- **Feature Branches** track individual features and fixes. On completion they should be merged into a release branch.
|
||||
|
||||
#### Git & PyPI
|
||||
|
||||
1. Update CHANGELOG.md with the most recent changes
|
||||
2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`)
|
||||
3. Bump the version using `bumpversion`:
|
||||
- Dry run first by running `bumpversion --new-version <desired-version> <part>` and checking the diff. If it looks correct, clean up the chanages and move on:
|
||||
- Alpha releases: `bumpversion --commit --no-tag --new-version 0.10.2a1 num`
|
||||
- Patch releases: `bumpversion --commit --no-tag --new-version 0.10.2 patch`
|
||||
- Minor releases: `bumpversion --commit --no-tag --new-version 0.11.0 minor`
|
||||
- Major releases: `bumpversion --commit --no-tag --new-version 1.0.0 major`
|
||||
4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`.
|
||||
5. Update the default branch to the next dev release branch.
|
||||
6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!!
|
||||
7. Deploy to pypi
|
||||
- `twine upload dist/*`
|
||||
8. Deploy to homebrew (see below)
|
||||
9. Deploy to conda-forge (see below)
|
||||
10. Git release notes (points to changelog)
|
||||
11. Post to slack (point to changelog)
|
||||
|
||||
After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward.
|
||||
|
||||
In some cases, where the branches have diverged wildly, it's ok to skip this step. But this means that the changes you just released won't be included in future releases.
|
||||
|
||||
#### Homebrew Release Process
|
||||
|
||||
1. Clone the `homebrew-dbt` repository:
|
||||
|
||||
```
|
||||
git clone git@github.com:fishtown-analytics/homebrew-dbt.git
|
||||
```
|
||||
|
||||
2. For ALL releases (prereleases and version releases), copy the relevant formula. To copy from the latest version release of dbt, do:
|
||||
|
||||
```bash
|
||||
cp Formula/dbt.rb Formula/dbt@{NEW-VERSION}.rb
|
||||
```
|
||||
|
||||
To copy from a different version, simply copy the corresponding file.
|
||||
|
||||
3. Open the file, and edit the following:
|
||||
- the name of the ruby class: this is important, homebrew won't function properly if the class name is wrong. Check historical versions to figure out the right name.
|
||||
- under the `bottle` section, remove all of the hashes (lines starting with `sha256`)
|
||||
|
||||
4. Create a **Python 3.7** virtualenv, activate it, and then install two packages: `homebrew-pypi-poet`, and the version of dbt you are preparing. I use:
|
||||
|
||||
```
|
||||
pyenv virtualenv 3.7.0 homebrew-dbt-{VERSION}
|
||||
pyenv activate homebrew-dbt-{VERSION}
|
||||
pip install dbt=={VERSION} homebrew-pypi-poet
|
||||
```
|
||||
|
||||
homebrew-pypi-poet is a program that generates a valid homebrew formula for an installed pip package. You want to use it to generate a diff against the existing formula. Then you want to apply the diff for the dependency packages only -- e.g. it will tell you that `google-api-core` has been updated and that you need to use the latest version.
|
||||
|
||||
5. reinstall, test, and audit dbt. if the test or audit fails, fix the formula with step 1.
|
||||
|
||||
```bash
|
||||
brew uninstall --force Formula/{YOUR-FILE}.rb
|
||||
brew install Formula/{YOUR-FILE}.rb
|
||||
brew test dbt
|
||||
brew audit --strict dbt
|
||||
```
|
||||
|
||||
6. Ask Connor to bottle the change (only his laptop can do it!)
|
||||
|
||||
#### Conda Forge Release Process
|
||||
|
||||
1. Clone the fork of `conda-forge/dbt-feedstock` [here](https://github.com/fishtown-analytics/dbt-feedstock)
|
||||
```bash
|
||||
git clone git@github.com:fishtown-analytics/dbt-feedstock.git
|
||||
|
||||
```
|
||||
2. Update the version and sha256 in `recipe/meta.yml`. To calculate the sha256, run:
|
||||
|
||||
```bash
|
||||
wget https://github.com/fishtown-analytics/dbt/archive/v{version}.tar.gz
|
||||
openssl sha256 v{version}.tar.gz
|
||||
```
|
||||
|
||||
3. Push the changes and create a PR against `conda-forge/dbt-feedstock`
|
||||
|
||||
4. Confirm that all automated conda-forge tests are passing
|
||||
@@ -6,8 +6,8 @@
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- dev/*
|
||||
- develop
|
||||
- '*.latest'
|
||||
- pr/*
|
||||
|
||||
jobs:
|
||||
@@ -23,7 +23,7 @@ jobs:
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-unit
|
||||
- script: python -m tox -e py -- -v
|
||||
displayName: Run unit tests
|
||||
|
||||
- job: PostgresIntegrationTest
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-postgres
|
||||
- script: python -m tox -e py-postgres -- -v -n4
|
||||
displayName: Run integration tests
|
||||
|
||||
# These three are all similar except secure environment variables, which MUST be passed along to their tasks,
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-snowflake
|
||||
- script: python -m tox -e py-snowflake -- -v -n4
|
||||
env:
|
||||
SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT)
|
||||
SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD)
|
||||
@@ -96,7 +96,7 @@ jobs:
|
||||
architecture: 'x64'
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
- script: python -m tox -e pywin-bigquery
|
||||
- script: python -m tox -e py-bigquery -- -v -n4
|
||||
env:
|
||||
BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON)
|
||||
displayName: Run integration tests
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-redshift
|
||||
- script: python -m tox -e py-redshift -- -v -n4
|
||||
env:
|
||||
REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME)
|
||||
REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS)
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
inputs:
|
||||
versionSpec: '3.7'
|
||||
architecture: 'x64'
|
||||
- script: python -m pip install --upgrade pip setuptools && python -m pip install -r requirements.txt && python -m pip install -r dev_requirements.txt
|
||||
- script: python -m pip install --upgrade pip setuptools && python -m pip install -r requirements.txt && python -m pip install -r dev-requirements.txt
|
||||
displayName: Install dependencies
|
||||
- task: ShellScript@2
|
||||
inputs:
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.exceptions import RuntimeException
|
||||
|
||||
from typing import Dict, ClassVar, Any, Optional
|
||||
|
||||
from dbt.exceptions import RuntimeException
|
||||
|
||||
|
||||
@dataclass
|
||||
class Column(JsonSchemaMixin):
|
||||
class Column:
|
||||
TYPE_LABELS: ClassVar[Dict[str, str]] = {
|
||||
'STRING': 'TEXT',
|
||||
'TIMESTAMP': 'TIMESTAMP',
|
||||
|
||||
@@ -4,14 +4,15 @@ import os
|
||||
from multiprocessing.synchronize import RLock
|
||||
from threading import get_ident
|
||||
from typing import (
|
||||
Dict, Tuple, Hashable, Optional, ContextManager, List
|
||||
Dict, Tuple, Hashable, Optional, ContextManager, List, Union
|
||||
)
|
||||
|
||||
import agate
|
||||
|
||||
import dbt.exceptions
|
||||
from dbt.contracts.connection import (
|
||||
Connection, Identifier, ConnectionState, AdapterRequiredConfig, LazyHandle
|
||||
Connection, Identifier, ConnectionState,
|
||||
AdapterRequiredConfig, LazyHandle, AdapterResponse
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.adapters.base.query_headers import (
|
||||
@@ -290,7 +291,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
|
||||
@abc.abstractmethod
|
||||
def execute(
|
||||
self, sql: str, auto_begin: bool = False, fetch: bool = False
|
||||
) -> Tuple[str, agate.Table]:
|
||||
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
|
||||
"""Execute the given SQL.
|
||||
|
||||
:param str sql: The sql to execute.
|
||||
@@ -298,7 +299,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
|
||||
transaction, automatically begin one.
|
||||
:param bool fetch: If set, fetch results.
|
||||
:return: A tuple of the status and the results (empty if fetch=False).
|
||||
:rtype: Tuple[str, agate.Table]
|
||||
:rtype: Tuple[Union[str, AdapterResponse], agate.Table]
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`execute` is not implemented for this adapter!'
|
||||
|
||||
@@ -28,14 +28,14 @@ from dbt.clients.jinja import MacroGenerator
|
||||
from dbt.contracts.graph.compiled import (
|
||||
CompileResultNode, CompiledSeedNode
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.manifest import Manifest, MacroManifest
|
||||
from dbt.contracts.graph.parsed import ParsedSeedNode
|
||||
from dbt.exceptions import warn_or_error
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.utils import filter_null_values, executor
|
||||
|
||||
from dbt.adapters.base.connections import Connection
|
||||
from dbt.adapters.base.connections import Connection, AdapterResponse
|
||||
from dbt.adapters.base.meta import AdapterMeta, available
|
||||
from dbt.adapters.base.relation import (
|
||||
ComponentName, BaseRelation, InformationSchema, SchemaSearchMap
|
||||
@@ -160,7 +160,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
self.config = config
|
||||
self.cache = RelationsCache()
|
||||
self.connections = self.ConnectionManager(config)
|
||||
self._macro_manifest_lazy: Optional[Manifest] = None
|
||||
self._macro_manifest_lazy: Optional[MacroManifest] = None
|
||||
|
||||
###
|
||||
# Methods that pass through to the connection manager
|
||||
@@ -213,7 +213,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
@available.parse(lambda *a, **k: ('', empty_table()))
|
||||
def execute(
|
||||
self, sql: str, auto_begin: bool = False, fetch: bool = False
|
||||
) -> Tuple[str, agate.Table]:
|
||||
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
|
||||
"""Execute the given SQL. This is a thin wrapper around
|
||||
ConnectionManager.execute.
|
||||
|
||||
@@ -222,7 +222,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
transaction, automatically begin one.
|
||||
:param bool fetch: If set, fetch results.
|
||||
:return: A tuple of the status and the results (empty if fetch=False).
|
||||
:rtype: Tuple[str, agate.Table]
|
||||
:rtype: Tuple[Union[str, AdapterResponse], agate.Table]
|
||||
"""
|
||||
return self.connections.execute(
|
||||
sql=sql,
|
||||
@@ -259,22 +259,22 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
return cls.ConnectionManager.TYPE
|
||||
|
||||
@property
|
||||
def _macro_manifest(self) -> Manifest:
|
||||
def _macro_manifest(self) -> MacroManifest:
|
||||
if self._macro_manifest_lazy is None:
|
||||
return self.load_macro_manifest()
|
||||
return self._macro_manifest_lazy
|
||||
|
||||
def check_macro_manifest(self) -> Optional[Manifest]:
|
||||
def check_macro_manifest(self) -> Optional[MacroManifest]:
|
||||
"""Return the internal manifest (used for executing macros) if it's
|
||||
been initialized, otherwise return None.
|
||||
"""
|
||||
return self._macro_manifest_lazy
|
||||
|
||||
def load_macro_manifest(self) -> Manifest:
|
||||
def load_macro_manifest(self) -> MacroManifest:
|
||||
if self._macro_manifest_lazy is None:
|
||||
# avoid a circular import
|
||||
from dbt.parser.manifest import load_macro_manifest
|
||||
manifest = load_macro_manifest(
|
||||
from dbt.parser.manifest import ManifestLoader
|
||||
manifest = ManifestLoader.load_macros(
|
||||
self.config, self.connections.set_query_header
|
||||
)
|
||||
self._macro_manifest_lazy = manifest
|
||||
|
||||
@@ -21,8 +21,8 @@ Self = TypeVar('Self', bound='BaseRelation')
|
||||
|
||||
@dataclass(frozen=True, eq=False, repr=False)
|
||||
class BaseRelation(FakeAPIObject, Hashable):
|
||||
type: Optional[RelationType]
|
||||
path: Path
|
||||
type: Optional[RelationType] = None
|
||||
quote_character: str = '"'
|
||||
include_policy: Policy = Policy()
|
||||
quote_policy: Policy = Policy()
|
||||
@@ -45,7 +45,7 @@ class BaseRelation(FakeAPIObject, Hashable):
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
return self.to_dict() == other.to_dict()
|
||||
return self.to_dict(omit_none=True) == other.to_dict(omit_none=True)
|
||||
|
||||
@classmethod
|
||||
def get_default_quote_policy(cls) -> Policy:
|
||||
@@ -185,10 +185,10 @@ class BaseRelation(FakeAPIObject, Hashable):
|
||||
def create_from_source(
|
||||
cls: Type[Self], source: ParsedSourceDefinition, **kwargs: Any
|
||||
) -> Self:
|
||||
source_quoting = source.quoting.to_dict()
|
||||
source_quoting = source.quoting.to_dict(omit_none=True)
|
||||
source_quoting.pop('column', None)
|
||||
quote_policy = deep_merge(
|
||||
cls.get_default_quote_policy().to_dict(),
|
||||
cls.get_default_quote_policy().to_dict(omit_none=True),
|
||||
source_quoting,
|
||||
kwargs.get('quote_policy', {}),
|
||||
)
|
||||
@@ -203,7 +203,7 @@ class BaseRelation(FakeAPIObject, Hashable):
|
||||
|
||||
@staticmethod
|
||||
def add_ephemeral_prefix(name: str):
|
||||
return f'__dbt__CTE__{name}'
|
||||
return f'__dbt__cte__{name}'
|
||||
|
||||
@classmethod
|
||||
def create_ephemeral_from_node(
|
||||
|
||||
@@ -7,7 +7,9 @@ from typing_extensions import Protocol
|
||||
|
||||
import agate
|
||||
|
||||
from dbt.contracts.connection import Connection, AdapterRequiredConfig
|
||||
from dbt.contracts.connection import (
|
||||
Connection, AdapterRequiredConfig, AdapterResponse
|
||||
)
|
||||
from dbt.contracts.graph.compiled import (
|
||||
CompiledNode, ManifestNode, NonSourceCompiledNode
|
||||
)
|
||||
@@ -154,7 +156,7 @@ class AdapterProtocol(
|
||||
|
||||
def execute(
|
||||
self, sql: str, auto_begin: bool = False, fetch: bool = False
|
||||
) -> Tuple[str, agate.Table]:
|
||||
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
|
||||
...
|
||||
|
||||
def get_compiler(self) -> Compiler_T:
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
import abc
|
||||
import time
|
||||
from typing import List, Optional, Tuple, Any, Iterable, Dict
|
||||
from typing import List, Optional, Tuple, Any, Iterable, Dict, Union
|
||||
|
||||
import agate
|
||||
|
||||
import dbt.clients.agate_helper
|
||||
import dbt.exceptions
|
||||
from dbt.adapters.base import BaseConnectionManager
|
||||
from dbt.contracts.connection import Connection, ConnectionState
|
||||
from dbt.contracts.connection import (
|
||||
Connection, ConnectionState, AdapterResponse
|
||||
)
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt import flags
|
||||
|
||||
@@ -18,7 +20,7 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
Methods to implement:
|
||||
- exception_handler
|
||||
- cancel
|
||||
- get_status
|
||||
- get_response
|
||||
- open
|
||||
"""
|
||||
@abc.abstractmethod
|
||||
@@ -76,20 +78,19 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
|
||||
cursor = connection.handle.cursor()
|
||||
cursor.execute(sql, bindings)
|
||||
|
||||
logger.debug(
|
||||
"SQL status: {status} in {elapsed:0.2f} seconds",
|
||||
status=self.get_status(cursor),
|
||||
status=self.get_response(cursor),
|
||||
elapsed=(time.time() - pre)
|
||||
)
|
||||
|
||||
return connection, cursor
|
||||
|
||||
@abc.abstractclassmethod
|
||||
def get_status(cls, cursor: Any) -> str:
|
||||
def get_response(cls, cursor: Any) -> Union[AdapterResponse, str]:
|
||||
"""Get the status of the cursor."""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`get_status` is not implemented for this adapter!'
|
||||
'`get_response` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -98,7 +99,14 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
column_names: Iterable[str],
|
||||
rows: Iterable[Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
|
||||
unique_col_names = dict()
|
||||
for idx in range(len(column_names)):
|
||||
col_name = column_names[idx]
|
||||
if col_name in unique_col_names:
|
||||
unique_col_names[col_name] += 1
|
||||
column_names[idx] = f'{col_name}_{unique_col_names[col_name]}'
|
||||
else:
|
||||
unique_col_names[column_names[idx]] = 1
|
||||
return [dict(zip(column_names, row)) for row in rows]
|
||||
|
||||
@classmethod
|
||||
@@ -118,15 +126,15 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
|
||||
def execute(
|
||||
self, sql: str, auto_begin: bool = False, fetch: bool = False
|
||||
) -> Tuple[str, agate.Table]:
|
||||
) -> Tuple[Union[AdapterResponse, str], agate.Table]:
|
||||
sql = self._add_query_comment(sql)
|
||||
_, cursor = self.add_query(sql, auto_begin)
|
||||
status = self.get_status(cursor)
|
||||
response = self.get_response(cursor)
|
||||
if fetch:
|
||||
table = self.get_result_from_cursor(cursor)
|
||||
else:
|
||||
table = dbt.clients.agate_helper.empty_table()
|
||||
return status, table
|
||||
return response, table
|
||||
|
||||
def add_begin_query(self):
|
||||
return self.add_query('BEGIN', auto_begin=False)
|
||||
|
||||
@@ -6,11 +6,19 @@ from dbt.logger import GLOBAL_LOGGER as logger
|
||||
import dbt.exceptions
|
||||
|
||||
|
||||
def clone(repo, cwd, dirname=None, remove_git_dir=False, branch=None):
|
||||
def _is_commit(revision: str) -> bool:
|
||||
# match SHA-1 git commit
|
||||
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
|
||||
|
||||
|
||||
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):
|
||||
has_revision = revision is not None
|
||||
is_commit = _is_commit(revision or "")
|
||||
|
||||
clone_cmd = ['git', 'clone', '--depth', '1']
|
||||
|
||||
if branch is not None:
|
||||
clone_cmd.extend(['--branch', branch])
|
||||
if has_revision and not is_commit:
|
||||
clone_cmd.extend(['--branch', revision])
|
||||
|
||||
clone_cmd.append(repo)
|
||||
|
||||
@@ -31,33 +39,38 @@ def list_tags(cwd):
|
||||
return tags
|
||||
|
||||
|
||||
def _checkout(cwd, repo, branch):
|
||||
logger.debug(' Checking out branch {}.'.format(branch))
|
||||
def _checkout(cwd, repo, revision):
|
||||
logger.debug(' Checking out revision {}.'.format(revision))
|
||||
|
||||
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
|
||||
run_cmd(cwd, ['git', 'fetch', '--tags', '--depth', '1', 'origin', branch])
|
||||
fetch_cmd = ["git", "fetch", "origin", "--depth", "1"]
|
||||
|
||||
tags = list_tags(cwd)
|
||||
|
||||
# Prefer tags to branches if one exists
|
||||
if branch in tags:
|
||||
spec = 'tags/{}'.format(branch)
|
||||
if _is_commit(revision):
|
||||
run_cmd(cwd, fetch_cmd + [revision])
|
||||
else:
|
||||
spec = 'origin/{}'.format(branch)
|
||||
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', revision])
|
||||
run_cmd(cwd, fetch_cmd + ["--tags", revision])
|
||||
|
||||
if _is_commit(revision):
|
||||
spec = revision
|
||||
# Prefer tags to branches if one exists
|
||||
elif revision in list_tags(cwd):
|
||||
spec = 'tags/{}'.format(revision)
|
||||
else:
|
||||
spec = 'origin/{}'.format(revision)
|
||||
|
||||
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec],
|
||||
env={'LC_ALL': 'C'})
|
||||
return out, err
|
||||
|
||||
|
||||
def checkout(cwd, repo, branch=None):
|
||||
if branch is None:
|
||||
branch = 'master'
|
||||
def checkout(cwd, repo, revision=None):
|
||||
if revision is None:
|
||||
revision = 'HEAD'
|
||||
try:
|
||||
return _checkout(cwd, repo, branch)
|
||||
return _checkout(cwd, repo, revision)
|
||||
except dbt.exceptions.CommandResultError as exc:
|
||||
stderr = exc.stderr.decode('utf-8').strip()
|
||||
dbt.exceptions.bad_package_spec(repo, branch, stderr)
|
||||
dbt.exceptions.bad_package_spec(repo, revision, stderr)
|
||||
|
||||
|
||||
def get_current_sha(cwd):
|
||||
@@ -71,7 +84,7 @@ def remove_remote(cwd):
|
||||
|
||||
|
||||
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
|
||||
branch=None):
|
||||
revision=None):
|
||||
exists = None
|
||||
try:
|
||||
_, err = clone(repo, cwd, dirname=dirname,
|
||||
@@ -97,7 +110,7 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
|
||||
logger.debug('Pulling new dependency {}.', directory)
|
||||
full_path = os.path.join(cwd, directory)
|
||||
start_sha = get_current_sha(full_path)
|
||||
checkout(full_path, repo, branch)
|
||||
checkout(full_path, repo, revision)
|
||||
end_sha = get_current_sha(full_path)
|
||||
if exists:
|
||||
if start_sha == end_sha:
|
||||
|
||||
@@ -21,7 +21,7 @@ import jinja2.sandbox
|
||||
|
||||
from dbt.utils import (
|
||||
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
|
||||
deep_map
|
||||
get_test_macro_name, deep_map
|
||||
)
|
||||
|
||||
from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
|
||||
@@ -231,6 +231,7 @@ class BaseMacroGenerator:
|
||||
template = self.get_template()
|
||||
# make the module. previously we set both vars and local, but that's
|
||||
# redundant: They both end up in the same place
|
||||
# make_module is in jinja2.environment. It returns a TemplateModule
|
||||
module = template.make_module(vars=self.context, shared=False)
|
||||
macro = module.__dict__[get_dbt_macro_name(name)]
|
||||
module.__dict__.update(self.context)
|
||||
@@ -244,6 +245,7 @@ class BaseMacroGenerator:
|
||||
raise_compiler_error(str(e))
|
||||
|
||||
def call_macro(self, *args, **kwargs):
|
||||
# called from __call__ methods
|
||||
if self.context is None:
|
||||
raise InternalException(
|
||||
'Context is still None in call_macro!'
|
||||
@@ -306,8 +308,10 @@ class MacroGenerator(BaseMacroGenerator):
|
||||
e.stack.append(self.macro)
|
||||
raise e
|
||||
|
||||
# This adds the macro's unique id to the node's 'depends_on'
|
||||
@contextmanager
|
||||
def track_call(self):
|
||||
# This is only called from __call__
|
||||
if self.stack is None or self.node is None:
|
||||
yield
|
||||
else:
|
||||
@@ -322,6 +326,7 @@ class MacroGenerator(BaseMacroGenerator):
|
||||
finally:
|
||||
self.stack.pop(unique_id)
|
||||
|
||||
# this makes MacroGenerator objects callable like functions
|
||||
def __call__(self, *args, **kwargs):
|
||||
with self.track_call():
|
||||
return self.call_macro(*args, **kwargs)
|
||||
@@ -403,6 +408,21 @@ class DocumentationExtension(jinja2.ext.Extension):
|
||||
return node
|
||||
|
||||
|
||||
class TestExtension(jinja2.ext.Extension):
|
||||
tags = ['test']
|
||||
|
||||
def parse(self, parser):
|
||||
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
|
||||
test_name = parser.parse_assign_target(name_only=True).name
|
||||
|
||||
parser.parse_signature(node)
|
||||
node.defaults = []
|
||||
node.name = get_test_macro_name(test_name)
|
||||
node.body = parser.parse_statements(('name:endtest',),
|
||||
drop_needle=True)
|
||||
return node
|
||||
|
||||
|
||||
def _is_dunder_name(name):
|
||||
return name.startswith('__') and name.endswith('__')
|
||||
|
||||
@@ -474,6 +494,7 @@ def get_environment(
|
||||
|
||||
args['extensions'].append(MaterializationExtension)
|
||||
args['extensions'].append(DocumentationExtension)
|
||||
args['extensions'].append(TestExtension)
|
||||
|
||||
env_cls: Type[jinja2.Environment]
|
||||
text_filter: Type
|
||||
@@ -642,3 +663,39 @@ def add_rendered_test_kwargs(
|
||||
|
||||
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
|
||||
context[SCHEMA_TEST_KWARGS_NAME] = kwargs
|
||||
|
||||
|
||||
def statically_extract_macro_calls(string, ctx):
|
||||
# set 'capture_macros' to capture undefined
|
||||
env = get_environment(None, capture_macros=True)
|
||||
parsed = env.parse(string)
|
||||
|
||||
standard_calls = {
|
||||
'source': [],
|
||||
'ref': [],
|
||||
'config': [],
|
||||
}
|
||||
|
||||
possible_macro_calls = []
|
||||
for func_call in parsed.find_all(jinja2.nodes.Call):
|
||||
if hasattr(func_call, 'node') and hasattr(func_call.node, 'name'):
|
||||
func_name = func_call.node.name
|
||||
else:
|
||||
# This is a kludge to capture an adapter.dispatch('<macro_name>') call.
|
||||
# Call(node=Getattr(
|
||||
# node=Name(name='adapter', ctx='load'), attr='dispatch', ctx='load'),
|
||||
# args=[Const(value='get_snapshot_unique_id')], kwargs=[],
|
||||
# dyn_args=None, dyn_kwargs=None)
|
||||
if (hasattr(func_call, 'node') and hasattr(func_call.node, 'attr') and
|
||||
func_call.node.attr == 'dispatch'):
|
||||
func_name = func_call.args[0].value
|
||||
else:
|
||||
continue
|
||||
if func_name in standard_calls:
|
||||
continue
|
||||
elif ctx.get(func_name):
|
||||
continue
|
||||
else:
|
||||
possible_macro_calls.append(func_name)
|
||||
|
||||
return possible_macro_calls
|
||||
|
||||
@@ -438,7 +438,9 @@ def run_cmd(
|
||||
return out, err
|
||||
|
||||
|
||||
def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
|
||||
def download(
|
||||
url: str, path: str, timeout: Optional[Union[float, tuple]] = None
|
||||
) -> None:
|
||||
path = convert_path(path)
|
||||
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
|
||||
response = requests.get(url, timeout=connection_timeout)
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
from typing import Any
|
||||
|
||||
import dbt.exceptions
|
||||
|
||||
import yaml
|
||||
import yaml.scanner
|
||||
|
||||
# the C version is faster, but it doesn't always exist
|
||||
YamlLoader: Any
|
||||
try:
|
||||
from yaml import CSafeLoader as YamlLoader
|
||||
from yaml import (
|
||||
CLoader as Loader,
|
||||
CSafeLoader as SafeLoader,
|
||||
CDumper as Dumper
|
||||
)
|
||||
except ImportError:
|
||||
from yaml import SafeLoader as YamlLoader
|
||||
from yaml import ( # type: ignore # noqa: F401
|
||||
Loader, SafeLoader, Dumper
|
||||
)
|
||||
|
||||
|
||||
YAML_ERROR_MESSAGE = """
|
||||
@@ -54,7 +57,7 @@ def contextualized_yaml_error(raw_contents, error):
|
||||
|
||||
|
||||
def safe_load(contents):
|
||||
return yaml.load(contents, Loader=YamlLoader)
|
||||
return yaml.load(contents, Loader=SafeLoader)
|
||||
|
||||
|
||||
def load_yaml_text(contents):
|
||||
|
||||
@@ -12,7 +12,6 @@ from dbt.clients.system import make_directory
|
||||
from dbt.context.providers import generate_runtime_model
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.compiled import (
|
||||
CompiledDataTestNode,
|
||||
CompiledSchemaTestNode,
|
||||
COMPILED_TYPES,
|
||||
GraphMemberNode,
|
||||
@@ -30,6 +29,7 @@ from dbt.graph import Graph
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.utils import pluralize
|
||||
import dbt.tracking
|
||||
|
||||
graph_file_name = 'graph.gpickle'
|
||||
|
||||
@@ -52,11 +52,17 @@ def print_compile_stats(stats):
|
||||
NodeType.Operation: 'operation',
|
||||
NodeType.Seed: 'seed file',
|
||||
NodeType.Source: 'source',
|
||||
NodeType.Exposure: 'exposure',
|
||||
}
|
||||
|
||||
results = {k: 0 for k in names.keys()}
|
||||
results.update(stats)
|
||||
|
||||
# create tracking event for resource_counts
|
||||
if dbt.tracking.active_user is not None:
|
||||
resource_counts = {k.pluralize(): v for k, v in results.items()}
|
||||
dbt.tracking.track_resource_counts(resource_counts)
|
||||
|
||||
stat_line = ", ".join([
|
||||
pluralize(ct, names.get(t)) for t, ct in results.items()
|
||||
if t in names
|
||||
@@ -81,6 +87,8 @@ def _generate_stats(manifest: Manifest):
|
||||
|
||||
for source in manifest.sources.values():
|
||||
stats[source.resource_type] += 1
|
||||
for exposure in manifest.exposures.values():
|
||||
stats[exposure.resource_type] += 1
|
||||
for macro in manifest.macros.values():
|
||||
stats[macro.resource_type] += 1
|
||||
return stats
|
||||
@@ -135,7 +143,7 @@ class Linker:
|
||||
"""
|
||||
out_graph = self.graph.copy()
|
||||
for node_id in self.graph.nodes():
|
||||
data = manifest.expect(node_id).to_dict()
|
||||
data = manifest.expect(node_id).to_dict(omit_none=True)
|
||||
out_graph.add_node(node_id, **data)
|
||||
nx.write_gpickle(out_graph, outfile)
|
||||
|
||||
@@ -188,11 +196,11 @@ class Compiler:
|
||||
[
|
||||
InjectedCTE(
|
||||
id="cte_id_1",
|
||||
sql="__dbt__CTE__ephemeral as (select * from table)",
|
||||
sql="__dbt__cte__ephemeral as (select * from table)",
|
||||
),
|
||||
InjectedCTE(
|
||||
id="cte_id_2",
|
||||
sql="__dbt__CTE__events as (select id, type from events)",
|
||||
sql="__dbt__cte__events as (select id, type from events)",
|
||||
),
|
||||
]
|
||||
|
||||
@@ -203,8 +211,8 @@ class Compiler:
|
||||
|
||||
This will spit out:
|
||||
|
||||
"with __dbt__CTE__ephemeral as (select * from table),
|
||||
__dbt__CTE__events as (select id, type from events),
|
||||
"with __dbt__cte__ephemeral as (select * from table),
|
||||
__dbt__cte__events as (select id, type from events),
|
||||
with internal_cte as (select * from sessions)
|
||||
select * from internal_cte"
|
||||
|
||||
@@ -242,9 +250,6 @@ class Compiler:
|
||||
|
||||
return str(parsed)
|
||||
|
||||
def _get_dbt_test_name(self) -> str:
|
||||
return 'dbt__CTE__INTERNAL_test'
|
||||
|
||||
# This method is called by the 'compile_node' method. Starting
|
||||
# from the node that it is passed in, it will recursively call
|
||||
# itself using the 'extra_ctes'. The 'ephemeral' models do
|
||||
@@ -275,55 +280,49 @@ class Compiler:
|
||||
# gathered and then "injected" into the model.
|
||||
prepended_ctes: List[InjectedCTE] = []
|
||||
|
||||
dbt_test_name = self._get_dbt_test_name()
|
||||
|
||||
# extra_ctes are added to the model by
|
||||
# RuntimeRefResolver.create_relation, which adds an
|
||||
# extra_cte for every model relation which is an
|
||||
# ephemeral model.
|
||||
for cte in model.extra_ctes:
|
||||
if cte.id == dbt_test_name:
|
||||
sql = cte.sql
|
||||
if cte.id not in manifest.nodes:
|
||||
raise InternalException(
|
||||
f'During compilation, found a cte reference that '
|
||||
f'could not be resolved: {cte.id}'
|
||||
)
|
||||
cte_model = manifest.nodes[cte.id]
|
||||
|
||||
if not cte_model.is_ephemeral_model:
|
||||
raise InternalException(f'{cte.id} is not ephemeral')
|
||||
|
||||
# This model has already been compiled, so it's been
|
||||
# through here before
|
||||
if getattr(cte_model, 'compiled', False):
|
||||
assert isinstance(cte_model, tuple(COMPILED_TYPES.values()))
|
||||
cte_model = cast(NonSourceCompiledNode, cte_model)
|
||||
new_prepended_ctes = cte_model.extra_ctes
|
||||
|
||||
# if the cte_model isn't compiled, i.e. first time here
|
||||
else:
|
||||
if cte.id not in manifest.nodes:
|
||||
raise InternalException(
|
||||
f'During compilation, found a cte reference that '
|
||||
f'could not be resolved: {cte.id}'
|
||||
# This is an ephemeral parsed model that we can compile.
|
||||
# Compile and update the node
|
||||
cte_model = self._compile_node(
|
||||
cte_model, manifest, extra_context)
|
||||
# recursively call this method
|
||||
cte_model, new_prepended_ctes = \
|
||||
self._recursively_prepend_ctes(
|
||||
cte_model, manifest, extra_context
|
||||
)
|
||||
cte_model = manifest.nodes[cte.id]
|
||||
# Save compiled SQL file and sync manifest
|
||||
self._write_node(cte_model)
|
||||
manifest.sync_update_node(cte_model)
|
||||
|
||||
if not cte_model.is_ephemeral_model:
|
||||
raise InternalException(f'{cte.id} is not ephemeral')
|
||||
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
|
||||
|
||||
# This model has already been compiled, so it's been
|
||||
# through here before
|
||||
if getattr(cte_model, 'compiled', False):
|
||||
assert isinstance(cte_model,
|
||||
tuple(COMPILED_TYPES.values()))
|
||||
cte_model = cast(NonSourceCompiledNode, cte_model)
|
||||
new_prepended_ctes = cte_model.extra_ctes
|
||||
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
|
||||
sql = f' {new_cte_name} as (\n{cte_model.compiled_sql}\n)'
|
||||
|
||||
# if the cte_model isn't compiled, i.e. first time here
|
||||
else:
|
||||
# This is an ephemeral parsed model that we can compile.
|
||||
# Compile and update the node
|
||||
cte_model = self._compile_node(
|
||||
cte_model, manifest, extra_context)
|
||||
# recursively call this method
|
||||
cte_model, new_prepended_ctes = \
|
||||
self._recursively_prepend_ctes(
|
||||
cte_model, manifest, extra_context
|
||||
)
|
||||
# Save compiled SQL file and sync manifest
|
||||
self._write_node(cte_model)
|
||||
manifest.sync_update_node(cte_model)
|
||||
|
||||
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
|
||||
|
||||
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
|
||||
sql = f' {new_cte_name} as (\n{cte_model.compiled_sql}\n)'
|
||||
|
||||
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
|
||||
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
|
||||
|
||||
# We don't save injected_sql into compiled sql for ephemeral models
|
||||
# because it will cause problems with processing of subsequent models.
|
||||
@@ -336,40 +335,12 @@ class Compiler:
|
||||
model.compiled_sql = injected_sql
|
||||
model.extra_ctes_injected = True
|
||||
model.extra_ctes = prepended_ctes
|
||||
model.validate(model.to_dict())
|
||||
model.validate(model.to_dict(omit_none=True))
|
||||
|
||||
manifest.update_node(model)
|
||||
|
||||
return model, prepended_ctes
|
||||
|
||||
def _add_ctes(
|
||||
self,
|
||||
compiled_node: NonSourceCompiledNode,
|
||||
manifest: Manifest,
|
||||
extra_context: Dict[str, Any],
|
||||
) -> NonSourceCompiledNode:
|
||||
"""Insert the CTEs for the model."""
|
||||
|
||||
# for data tests, we need to insert a special CTE at the end of the
|
||||
# list containing the test query, and then have the "real" query be a
|
||||
# select count(*) from that model.
|
||||
# the benefit of doing it this way is that _insert_ctes() can be
|
||||
# rewritten for different adapters to handle databses that don't
|
||||
# support CTEs, or at least don't have full support.
|
||||
if isinstance(compiled_node, CompiledDataTestNode):
|
||||
# the last prepend (so last in order) should be the data test body.
|
||||
# then we can add our select count(*) from _that_ cte as the "real"
|
||||
# compiled_sql, and do the regular prepend logic from CTEs.
|
||||
name = self._get_dbt_test_name()
|
||||
cte = InjectedCTE(
|
||||
id=name,
|
||||
sql=f' {name} as (\n{compiled_node.compiled_sql}\n)'
|
||||
)
|
||||
compiled_node.extra_ctes.append(cte)
|
||||
compiled_node.compiled_sql = f'\nselect count(*) from {name}'
|
||||
|
||||
return compiled_node
|
||||
|
||||
# creates a compiled_node from the ManifestNode passed in,
|
||||
# creates a "context" dictionary for jinja rendering,
|
||||
# and then renders the "compiled_sql" using the node, the
|
||||
@@ -385,7 +356,7 @@ class Compiler:
|
||||
|
||||
logger.debug("Compiling {}".format(node.unique_id))
|
||||
|
||||
data = node.to_dict()
|
||||
data = node.to_dict(omit_none=True)
|
||||
data.update({
|
||||
'compiled': False,
|
||||
'compiled_sql': None,
|
||||
@@ -408,12 +379,6 @@ class Compiler:
|
||||
|
||||
compiled_node.compiled = True
|
||||
|
||||
# add ctes for specific test nodes, and also for
|
||||
# possible future use in adapters
|
||||
compiled_node = self._add_ctes(
|
||||
compiled_node, manifest, extra_context
|
||||
)
|
||||
|
||||
return compiled_node
|
||||
|
||||
def write_graph_file(self, linker: Linker, manifest: Manifest):
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
import os
|
||||
|
||||
from hologram import ValidationError
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
@@ -75,6 +75,7 @@ def read_user_config(directory: str) -> UserConfig:
|
||||
if profile:
|
||||
user_cfg = coerce_dict_str(profile.get('config', {}))
|
||||
if user_cfg is not None:
|
||||
UserConfig.validate(user_cfg)
|
||||
return UserConfig.from_dict(user_cfg)
|
||||
except (RuntimeException, ValidationError):
|
||||
pass
|
||||
@@ -110,8 +111,8 @@ class Profile(HasCredentials):
|
||||
'credentials': self.credentials,
|
||||
}
|
||||
if serialize_credentials:
|
||||
result['config'] = self.config.to_dict()
|
||||
result['credentials'] = self.credentials.to_dict()
|
||||
result['config'] = self.config.to_dict(omit_none=True)
|
||||
result['credentials'] = self.credentials.to_dict(omit_none=True)
|
||||
return result
|
||||
|
||||
def to_target_dict(self) -> Dict[str, Any]:
|
||||
@@ -124,7 +125,7 @@ class Profile(HasCredentials):
|
||||
'name': self.target_name,
|
||||
'target_name': self.target_name,
|
||||
'profile_name': self.profile_name,
|
||||
'config': self.config.to_dict(),
|
||||
'config': self.config.to_dict(omit_none=True),
|
||||
})
|
||||
return target
|
||||
|
||||
@@ -137,10 +138,10 @@ class Profile(HasCredentials):
|
||||
def validate(self):
|
||||
try:
|
||||
if self.credentials:
|
||||
self.credentials.to_dict(validate=True)
|
||||
ProfileConfig.from_dict(
|
||||
self.to_profile_info(serialize_credentials=True)
|
||||
)
|
||||
dct = self.credentials.to_dict(omit_none=True)
|
||||
self.credentials.validate(dct)
|
||||
dct = self.to_profile_info(serialize_credentials=True)
|
||||
ProfileConfig.validate(dct)
|
||||
except ValidationError as exc:
|
||||
raise DbtProfileError(validator_error_message(exc)) from exc
|
||||
|
||||
@@ -160,7 +161,9 @@ class Profile(HasCredentials):
|
||||
typename = profile.pop('type')
|
||||
try:
|
||||
cls = load_plugin(typename)
|
||||
credentials = cls.from_dict(profile)
|
||||
data = cls.translate_aliases(profile)
|
||||
cls.validate(data)
|
||||
credentials = cls.from_dict(data)
|
||||
except (RuntimeException, ValidationError) as e:
|
||||
msg = str(e) if isinstance(e, RuntimeException) else e.message
|
||||
raise DbtProfileError(
|
||||
@@ -233,6 +236,7 @@ class Profile(HasCredentials):
|
||||
"""
|
||||
if user_cfg is None:
|
||||
user_cfg = {}
|
||||
UserConfig.validate(user_cfg)
|
||||
config = UserConfig.from_dict(user_cfg)
|
||||
|
||||
profile = cls(
|
||||
|
||||
@@ -26,15 +26,12 @@ from dbt.version import get_installed_version
|
||||
from dbt.utils import MultiDict
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.config.selectors import SelectorDict
|
||||
|
||||
from dbt.contracts.project import (
|
||||
Project as ProjectContract,
|
||||
SemverString,
|
||||
)
|
||||
from dbt.contracts.project import PackageConfig
|
||||
|
||||
from hologram import ValidationError
|
||||
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
from .renderer import DbtProjectYamlRenderer
|
||||
from .selectors import (
|
||||
selector_config_from_data,
|
||||
@@ -101,6 +98,7 @@ def package_config_from_data(packages_data: Dict[str, Any]):
|
||||
packages_data = {'packages': []}
|
||||
|
||||
try:
|
||||
PackageConfig.validate(packages_data)
|
||||
packages = PackageConfig.from_dict(packages_data)
|
||||
except ValidationError as e:
|
||||
raise DbtProjectError(
|
||||
@@ -306,7 +304,10 @@ class PartialProject(RenderComponents):
|
||||
)
|
||||
|
||||
try:
|
||||
cfg = ProjectContract.from_dict(rendered.project_dict)
|
||||
ProjectContract.validate(rendered.project_dict)
|
||||
cfg = ProjectContract.from_dict(
|
||||
rendered.project_dict
|
||||
)
|
||||
except ValidationError as e:
|
||||
raise DbtProjectError(validator_error_message(e)) from e
|
||||
# name/version are required in the Project definition, so we can assume
|
||||
@@ -346,18 +347,20 @@ class PartialProject(RenderComponents):
|
||||
# break many things
|
||||
quoting: Dict[str, Any] = {}
|
||||
if cfg.quoting is not None:
|
||||
quoting = cfg.quoting.to_dict()
|
||||
quoting = cfg.quoting.to_dict(omit_none=True)
|
||||
|
||||
models: Dict[str, Any]
|
||||
seeds: Dict[str, Any]
|
||||
snapshots: Dict[str, Any]
|
||||
sources: Dict[str, Any]
|
||||
tests: Dict[str, Any]
|
||||
vars_value: VarProvider
|
||||
|
||||
models = cfg.models
|
||||
seeds = cfg.seeds
|
||||
snapshots = cfg.snapshots
|
||||
sources = cfg.sources
|
||||
tests = cfg.tests
|
||||
if cfg.vars is None:
|
||||
vars_dict: Dict[str, Any] = {}
|
||||
else:
|
||||
@@ -407,6 +410,7 @@ class PartialProject(RenderComponents):
|
||||
selectors=selectors,
|
||||
query_comment=query_comment,
|
||||
sources=sources,
|
||||
tests=tests,
|
||||
vars=vars_value,
|
||||
config_version=cfg.config_version,
|
||||
unrendered=unrendered,
|
||||
@@ -512,6 +516,7 @@ class Project:
|
||||
seeds: Dict[str, Any]
|
||||
snapshots: Dict[str, Any]
|
||||
sources: Dict[str, Any]
|
||||
tests: Dict[str, Any]
|
||||
vars: VarProvider
|
||||
dbt_version: List[VersionSpecifier]
|
||||
packages: Dict[str, Any]
|
||||
@@ -570,6 +575,7 @@ class Project:
|
||||
'seeds': self.seeds,
|
||||
'snapshots': self.snapshots,
|
||||
'sources': self.sources,
|
||||
'tests': self.tests,
|
||||
'vars': self.vars.to_dict(),
|
||||
'require-dbt-version': [
|
||||
v.to_version_string() for v in self.dbt_version
|
||||
@@ -577,16 +583,17 @@ class Project:
|
||||
'config-version': self.config_version,
|
||||
})
|
||||
if self.query_comment:
|
||||
result['query-comment'] = self.query_comment.to_dict()
|
||||
result['query-comment'] = \
|
||||
self.query_comment.to_dict(omit_none=True)
|
||||
|
||||
if with_packages:
|
||||
result.update(self.packages.to_dict())
|
||||
result.update(self.packages.to_dict(omit_none=True))
|
||||
|
||||
return result
|
||||
|
||||
def validate(self):
|
||||
try:
|
||||
ProjectContract.from_dict(self.to_project_config())
|
||||
ProjectContract.validate(self.to_project_config())
|
||||
except ValidationError as e:
|
||||
raise DbtProjectError(validator_error_message(e)) from e
|
||||
|
||||
|
||||
@@ -145,7 +145,7 @@ class DbtProjectYamlRenderer(BaseRenderer):
|
||||
if first == 'vars':
|
||||
return False
|
||||
|
||||
if first in {'seeds', 'models', 'snapshots', 'seeds'}:
|
||||
if first in {'seeds', 'models', 'snapshots', 'tests'}:
|
||||
keypath_parts = {
|
||||
(k.lstrip('+') if isinstance(k, str) else k)
|
||||
for k in keypath
|
||||
|
||||
@@ -33,7 +33,7 @@ from dbt.exceptions import (
|
||||
raise_compiler_error
|
||||
)
|
||||
|
||||
from hologram import ValidationError
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
|
||||
def _project_quoting_dict(
|
||||
@@ -78,7 +78,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
get_relation_class_by_name(profile.credentials.type)
|
||||
.get_default_quote_policy()
|
||||
.replace_dict(_project_quoting_dict(project, profile))
|
||||
).to_dict()
|
||||
).to_dict(omit_none=True)
|
||||
|
||||
cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}'))
|
||||
|
||||
@@ -110,6 +110,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
selectors=project.selectors,
|
||||
query_comment=project.query_comment,
|
||||
sources=project.sources,
|
||||
tests=project.tests,
|
||||
vars=project.vars,
|
||||
config_version=project.config_version,
|
||||
unrendered=project.unrendered,
|
||||
@@ -174,7 +175,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
:raises DbtProjectError: If the configuration fails validation.
|
||||
"""
|
||||
try:
|
||||
Configuration.from_dict(self.serialize())
|
||||
Configuration.validate(self.serialize())
|
||||
except ValidationError as e:
|
||||
raise DbtProjectError(validator_error_message(e)) from e
|
||||
|
||||
@@ -272,7 +273,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
return frozenset(paths)
|
||||
|
||||
def get_resource_config_paths(self) -> Dict[str, PathSet]:
|
||||
"""Return a dictionary with 'seeds' and 'models' keys whose values are
|
||||
"""Return a dictionary with resource type keys whose values are
|
||||
lists of lists of strings, where each inner list of strings represents
|
||||
a configured path in the resource.
|
||||
"""
|
||||
@@ -281,6 +282,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
'seeds': self._get_config_paths(self.seeds),
|
||||
'snapshots': self._get_config_paths(self.snapshots),
|
||||
'sources': self._get_config_paths(self.sources),
|
||||
'tests': self._get_config_paths(self.tests),
|
||||
}
|
||||
|
||||
def get_unused_resource_config_paths(
|
||||
@@ -391,7 +393,7 @@ class UnsetConfig(UserConfig):
|
||||
f"'UnsetConfig' object has no attribute {name}"
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
def __post_serialize__(self, dct):
|
||||
return {}
|
||||
|
||||
|
||||
@@ -488,6 +490,7 @@ class UnsetProfileConfig(RuntimeConfig):
|
||||
selectors=project.selectors,
|
||||
query_comment=project.query_comment,
|
||||
sources=project.sources,
|
||||
tests=project.tests,
|
||||
vars=project.vars,
|
||||
config_version=project.config_version,
|
||||
unrendered=project.unrendered,
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
import yaml
|
||||
|
||||
from hologram import ValidationError
|
||||
from dbt.clients.yaml_helper import ( # noqa: F401
|
||||
yaml, Loader, Dumper, load_yaml_text
|
||||
)
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
from .renderer import SelectorRenderer
|
||||
|
||||
@@ -11,7 +12,6 @@ from dbt.clients.system import (
|
||||
path_exists,
|
||||
resolve_path_from_base,
|
||||
)
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.contracts.selection import SelectorFile
|
||||
from dbt.exceptions import DbtSelectorsError, RuntimeException
|
||||
from dbt.graph import parse_from_selectors_definition, SelectionSpec
|
||||
@@ -30,9 +30,11 @@ Validator Error:
|
||||
|
||||
|
||||
class SelectorConfig(Dict[str, SelectionSpec]):
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
|
||||
def selectors_from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
|
||||
try:
|
||||
SelectorFile.validate(data)
|
||||
selector_file = SelectorFile.from_dict(data)
|
||||
selectors = parse_from_selectors_definition(selector_file)
|
||||
except ValidationError as exc:
|
||||
@@ -66,7 +68,7 @@ class SelectorConfig(Dict[str, SelectionSpec]):
|
||||
f'Could not render selector data: {exc}',
|
||||
result_type='invalid_selector',
|
||||
) from exc
|
||||
return cls.from_dict(rendered)
|
||||
return cls.selectors_from_dict(rendered)
|
||||
|
||||
@classmethod
|
||||
def from_path(
|
||||
@@ -107,7 +109,7 @@ def selector_config_from_data(
|
||||
selectors_data = {'selectors': []}
|
||||
|
||||
try:
|
||||
selectors = SelectorConfig.from_dict(selectors_data)
|
||||
selectors = SelectorConfig.selectors_from_dict(selectors_data)
|
||||
except ValidationError as e:
|
||||
raise DbtSelectorsError(
|
||||
MALFORMED_SELECTOR_ERROR.format(error=str(e.message)),
|
||||
|
||||
@@ -7,13 +7,14 @@ from typing import (
|
||||
from dbt import flags
|
||||
from dbt import tracking
|
||||
from dbt.clients.jinja import undefined_error, get_rendered
|
||||
from dbt.clients import yaml_helper
|
||||
from dbt.clients.yaml_helper import ( # noqa: F401
|
||||
yaml, safe_load, SafeLoader, Loader, Dumper
|
||||
)
|
||||
from dbt.contracts.graph.compiled import CompiledResource
|
||||
from dbt.exceptions import raise_compiler_error, MacroReturn
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.version import __version__ as dbt_version
|
||||
|
||||
import yaml
|
||||
# These modules are added to the context. Consider alternative
|
||||
# approaches which will extend well to potentially many modules
|
||||
import pytz
|
||||
@@ -172,6 +173,7 @@ class BaseContext(metaclass=ContextMeta):
|
||||
builtins[key] = value
|
||||
return builtins
|
||||
|
||||
# no dbtClassMixin so this is not an actual override
|
||||
def to_dict(self):
|
||||
self._ctx['context'] = self._ctx
|
||||
builtins = self.generate_builtins()
|
||||
@@ -394,7 +396,7 @@ class BaseContext(metaclass=ContextMeta):
|
||||
-- ["good"]
|
||||
"""
|
||||
try:
|
||||
return yaml_helper.safe_load(value)
|
||||
return safe_load(value)
|
||||
except (AttributeError, ValueError, yaml.YAMLError):
|
||||
return default
|
||||
|
||||
@@ -536,4 +538,5 @@ class BaseContext(metaclass=ContextMeta):
|
||||
|
||||
def generate_base_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
|
||||
ctx = BaseContext(cli_vars)
|
||||
# This is not a Mashumaro to_dict call
|
||||
return ctx.to_dict()
|
||||
|
||||
@@ -41,6 +41,8 @@ class UnrenderedConfig(ConfigSource):
|
||||
model_configs = unrendered.get('snapshots')
|
||||
elif resource_type == NodeType.Source:
|
||||
model_configs = unrendered.get('sources')
|
||||
elif resource_type == NodeType.Test:
|
||||
model_configs = unrendered.get('tests')
|
||||
else:
|
||||
model_configs = unrendered.get('models')
|
||||
|
||||
@@ -61,6 +63,8 @@ class RenderedConfig(ConfigSource):
|
||||
model_configs = self.project.snapshots
|
||||
elif resource_type == NodeType.Source:
|
||||
model_configs = self.project.sources
|
||||
elif resource_type == NodeType.Test:
|
||||
model_configs = self.project.tests
|
||||
else:
|
||||
model_configs = self.project.models
|
||||
return model_configs
|
||||
@@ -165,7 +169,7 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
|
||||
# Calculate the defaults. We don't want to validate the defaults,
|
||||
# because it might be invalid in the case of required config members
|
||||
# (such as on snapshots!)
|
||||
result = config_cls.from_dict({}, validate=False)
|
||||
result = config_cls.from_dict({})
|
||||
return result
|
||||
|
||||
def _update_from_config(
|
||||
@@ -196,7 +200,7 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
|
||||
base=base,
|
||||
)
|
||||
finalized = config.finalize_and_validate()
|
||||
return finalized.to_dict()
|
||||
return finalized.to_dict(omit_none=True)
|
||||
|
||||
|
||||
class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]):
|
||||
|
||||
@@ -77,4 +77,5 @@ def generate_runtime_docs(
|
||||
current_project: str,
|
||||
) -> Dict[str, Any]:
|
||||
ctx = DocsRuntimeContext(config, target, manifest, current_project)
|
||||
# This is not a Mashumaro to_dict call
|
||||
return ctx.to_dict()
|
||||
|
||||
179
core/dbt/context/macro_resolver.py
Normal file
179
core/dbt/context/macro_resolver.py
Normal file
@@ -0,0 +1,179 @@
|
||||
from typing import (
|
||||
Dict, MutableMapping, Optional
|
||||
)
|
||||
from dbt.contracts.graph.parsed import ParsedMacro
|
||||
from dbt.exceptions import raise_duplicate_macro_name, raise_compiler_error
|
||||
from dbt.include.global_project import PROJECT_NAME as GLOBAL_PROJECT_NAME
|
||||
from dbt.clients.jinja import MacroGenerator
|
||||
|
||||
MacroNamespace = Dict[str, ParsedMacro]
|
||||
|
||||
|
||||
# This class builds the MacroResolver by adding macros
|
||||
# to various categories for finding macros in the right order,
|
||||
# so that higher precedence macros are found first.
|
||||
# This functionality is also provided by the MacroNamespace,
|
||||
# but the intention is to eventually replace that class.
|
||||
# This enables us to get the macro unique_id without
|
||||
# processing every macro in the project.
|
||||
# Note: the root project macros override everything in the
|
||||
# dbt internal projects. External projects (dependencies) will
|
||||
# use their own macros first, then pull from the root project
|
||||
# followed by dbt internal projects.
|
||||
class MacroResolver:
|
||||
def __init__(
|
||||
self,
|
||||
macros: MutableMapping[str, ParsedMacro],
|
||||
root_project_name: str,
|
||||
internal_package_names,
|
||||
) -> None:
|
||||
self.root_project_name = root_project_name
|
||||
self.macros = macros
|
||||
# internal packages comes from get_adapter_package_names
|
||||
self.internal_package_names = internal_package_names
|
||||
|
||||
# To be filled in from macros.
|
||||
self.internal_packages: Dict[str, MacroNamespace] = {}
|
||||
self.packages: Dict[str, MacroNamespace] = {}
|
||||
self.root_package_macros: MacroNamespace = {}
|
||||
|
||||
# add the macros to internal_packages, packages, and root packages
|
||||
self.add_macros()
|
||||
self._build_internal_packages_namespace()
|
||||
self._build_macros_by_name()
|
||||
|
||||
def _build_internal_packages_namespace(self):
|
||||
# Iterate in reverse-order and overwrite: the packages that are first
|
||||
# in the list are the ones we want to "win".
|
||||
self.internal_packages_namespace: MacroNamespace = {}
|
||||
for pkg in reversed(self.internal_package_names):
|
||||
if pkg in self.internal_packages:
|
||||
# Turn the internal packages into a flat namespace
|
||||
self.internal_packages_namespace.update(
|
||||
self.internal_packages[pkg])
|
||||
|
||||
# search order:
|
||||
# local_namespace (package of particular node), not including
|
||||
# the internal packages or the root package
|
||||
# This means that within an extra package, it uses its own macros
|
||||
# root package namespace
|
||||
# non-internal packages (that aren't local or root)
|
||||
# dbt internal packages
|
||||
def _build_macros_by_name(self):
|
||||
macros_by_name = {}
|
||||
|
||||
# all internal packages (already in the right order)
|
||||
for macro in self.internal_packages_namespace.values():
|
||||
macros_by_name[macro.name] = macro
|
||||
|
||||
# non-internal packages
|
||||
for fnamespace in self.packages.values():
|
||||
for macro in fnamespace.values():
|
||||
macros_by_name[macro.name] = macro
|
||||
|
||||
# root package macros
|
||||
for macro in self.root_package_macros.values():
|
||||
macros_by_name[macro.name] = macro
|
||||
|
||||
self.macros_by_name = macros_by_name
|
||||
|
||||
def _add_macro_to(
|
||||
self,
|
||||
package_namespaces: Dict[str, MacroNamespace],
|
||||
macro: ParsedMacro,
|
||||
):
|
||||
if macro.package_name in package_namespaces:
|
||||
namespace = package_namespaces[macro.package_name]
|
||||
else:
|
||||
namespace = {}
|
||||
package_namespaces[macro.package_name] = namespace
|
||||
|
||||
if macro.name in namespace:
|
||||
raise_duplicate_macro_name(
|
||||
macro, macro, macro.package_name
|
||||
)
|
||||
package_namespaces[macro.package_name][macro.name] = macro
|
||||
|
||||
def add_macro(self, macro: ParsedMacro):
|
||||
macro_name: str = macro.name
|
||||
|
||||
# internal macros (from plugins) will be processed separately from
|
||||
# project macros, so store them in a different place
|
||||
if macro.package_name in self.internal_package_names:
|
||||
self._add_macro_to(self.internal_packages, macro)
|
||||
else:
|
||||
# if it's not an internal package
|
||||
self._add_macro_to(self.packages, macro)
|
||||
# add to root_package_macros if it's in the root package
|
||||
if macro.package_name == self.root_project_name:
|
||||
self.root_package_macros[macro_name] = macro
|
||||
|
||||
def add_macros(self):
|
||||
for macro in self.macros.values():
|
||||
self.add_macro(macro)
|
||||
|
||||
def get_macro(self, local_package, macro_name):
|
||||
local_package_macros = {}
|
||||
if (local_package not in self.internal_package_names and
|
||||
local_package in self.packages):
|
||||
local_package_macros = self.packages[local_package]
|
||||
# First: search the local packages for this macro
|
||||
if macro_name in local_package_macros:
|
||||
return local_package_macros[macro_name]
|
||||
# Now look up in the standard search order
|
||||
if macro_name in self.macros_by_name:
|
||||
return self.macros_by_name[macro_name]
|
||||
return None
|
||||
|
||||
def get_macro_id(self, local_package, macro_name):
|
||||
macro = self.get_macro(local_package, macro_name)
|
||||
if macro is None:
|
||||
return None
|
||||
else:
|
||||
return macro.unique_id
|
||||
|
||||
|
||||
# Currently this is just used by test processing in the schema
|
||||
# parser (in connection with the MacroResolver). Future work
|
||||
# will extend the use of these classes to other parsing areas.
|
||||
# One of the features of this class compared to the MacroNamespace
|
||||
# is that you can limit the number of macros provided to the
|
||||
# context dictionary in the 'to_dict' manifest method.
|
||||
class TestMacroNamespace:
|
||||
def __init__(
|
||||
self, macro_resolver, ctx, node, thread_ctx, depends_on_macros
|
||||
):
|
||||
self.macro_resolver = macro_resolver
|
||||
self.ctx = ctx
|
||||
self.node = node
|
||||
self.thread_ctx = thread_ctx
|
||||
local_namespace = {}
|
||||
if depends_on_macros:
|
||||
for macro_unique_id in depends_on_macros:
|
||||
if macro_unique_id in self.macro_resolver.macros:
|
||||
macro = self.macro_resolver.macros[macro_unique_id]
|
||||
local_namespace[macro.name] = MacroGenerator(
|
||||
macro, self.ctx, self.node, self.thread_ctx,
|
||||
)
|
||||
self.local_namespace = local_namespace
|
||||
|
||||
def get_from_package(
|
||||
self, package_name: Optional[str], name: str
|
||||
) -> Optional[MacroGenerator]:
|
||||
macro = None
|
||||
if package_name is None:
|
||||
macro = self.macro_resolver.macros_by_name.get(name)
|
||||
elif package_name == GLOBAL_PROJECT_NAME:
|
||||
macro = self.macro_resolver.internal_packages_namespace.get(name)
|
||||
elif package_name in self.macro_resolver.packages:
|
||||
macro = self.macro_resolver.packages[package_name].get(name)
|
||||
else:
|
||||
raise_compiler_error(
|
||||
f"Could not find package '{package_name}'"
|
||||
)
|
||||
if not macro:
|
||||
return None
|
||||
macro_func = MacroGenerator(
|
||||
macro, self.ctx, self.node, self.thread_ctx
|
||||
)
|
||||
return macro_func
|
||||
@@ -15,13 +15,21 @@ NamespaceMember = Union[FlatNamespace, MacroGenerator]
|
||||
FullNamespace = Dict[str, NamespaceMember]
|
||||
|
||||
|
||||
# The point of this class is to collect the various macros
|
||||
# and provide the ability to flatten them into the ManifestContexts
|
||||
# that are created for jinja, so that macro calls can be resolved.
|
||||
# Creates special iterators and _keys methods to flatten the lists.
|
||||
# When this class is created it has a static 'local_namespace' which
|
||||
# depends on the package of the node, so it only works for one
|
||||
# particular local package at a time for "flattening" into a context.
|
||||
# 'get_by_package' should work for any macro.
|
||||
class MacroNamespace(Mapping):
|
||||
def __init__(
|
||||
self,
|
||||
global_namespace: FlatNamespace,
|
||||
local_namespace: FlatNamespace,
|
||||
global_project_namespace: FlatNamespace,
|
||||
packages: Dict[str, FlatNamespace],
|
||||
global_namespace: FlatNamespace, # root package macros
|
||||
local_namespace: FlatNamespace, # packages for *this* node
|
||||
global_project_namespace: FlatNamespace, # internal packages
|
||||
packages: Dict[str, FlatNamespace], # non-internal packages
|
||||
):
|
||||
self.global_namespace: FlatNamespace = global_namespace
|
||||
self.local_namespace: FlatNamespace = local_namespace
|
||||
@@ -29,20 +37,24 @@ class MacroNamespace(Mapping):
|
||||
self.global_project_namespace: FlatNamespace = global_project_namespace
|
||||
|
||||
def _search_order(self) -> Iterable[Union[FullNamespace, FlatNamespace]]:
|
||||
yield self.local_namespace
|
||||
yield self.global_namespace
|
||||
yield self.packages
|
||||
yield self.local_namespace # local package
|
||||
yield self.global_namespace # root package
|
||||
yield self.packages # non-internal packages
|
||||
yield {
|
||||
GLOBAL_PROJECT_NAME: self.global_project_namespace,
|
||||
GLOBAL_PROJECT_NAME: self.global_project_namespace, # dbt
|
||||
}
|
||||
yield self.global_project_namespace
|
||||
yield self.global_project_namespace # other internal project besides dbt
|
||||
|
||||
# provides special keys method for MacroNamespace iterator
|
||||
# returns keys from local_namespace, global_namespace, packages,
|
||||
# global_project_namespace
|
||||
def _keys(self) -> Set[str]:
|
||||
keys: Set[str] = set()
|
||||
for search in self._search_order():
|
||||
keys.update(search)
|
||||
return keys
|
||||
|
||||
# special iterator using special keys
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
for key in self._keys():
|
||||
yield key
|
||||
@@ -72,6 +84,10 @@ class MacroNamespace(Mapping):
|
||||
)
|
||||
|
||||
|
||||
# This class builds the MacroNamespace by adding macros to
|
||||
# internal_packages or packages, and locals/globals.
|
||||
# Call 'build_namespace' to return a MacroNamespace.
|
||||
# This is used by ManifestContext (and subclasses)
|
||||
class MacroNamespaceBuilder:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -83,10 +99,17 @@ class MacroNamespaceBuilder:
|
||||
) -> None:
|
||||
self.root_package = root_package
|
||||
self.search_package = search_package
|
||||
# internal packages comes from get_adapter_package_names
|
||||
self.internal_package_names = set(internal_packages)
|
||||
self.internal_package_names_order = internal_packages
|
||||
# macro_func is added here if in root package, since
|
||||
# the root package acts as a "global" namespace, overriding
|
||||
# everything else except local external package macro calls
|
||||
self.globals: FlatNamespace = {}
|
||||
# macro_func is added here if it's the package for this node
|
||||
self.locals: FlatNamespace = {}
|
||||
# Create a dictionary of [package name][macro name] =
|
||||
# MacroGenerator object which acts like a function
|
||||
self.internal_packages: Dict[str, FlatNamespace] = {}
|
||||
self.packages: Dict[str, FlatNamespace] = {}
|
||||
self.thread_ctx = thread_ctx
|
||||
@@ -94,25 +117,28 @@ class MacroNamespaceBuilder:
|
||||
|
||||
def _add_macro_to(
|
||||
self,
|
||||
heirarchy: Dict[str, FlatNamespace],
|
||||
hierarchy: Dict[str, FlatNamespace],
|
||||
macro: ParsedMacro,
|
||||
macro_func: MacroGenerator,
|
||||
):
|
||||
if macro.package_name in heirarchy:
|
||||
namespace = heirarchy[macro.package_name]
|
||||
if macro.package_name in hierarchy:
|
||||
namespace = hierarchy[macro.package_name]
|
||||
else:
|
||||
namespace = {}
|
||||
heirarchy[macro.package_name] = namespace
|
||||
hierarchy[macro.package_name] = namespace
|
||||
|
||||
if macro.name in namespace:
|
||||
raise_duplicate_macro_name(
|
||||
macro_func.macro, macro, macro.package_name
|
||||
)
|
||||
heirarchy[macro.package_name][macro.name] = macro_func
|
||||
hierarchy[macro.package_name][macro.name] = macro_func
|
||||
|
||||
def add_macro(self, macro: ParsedMacro, ctx: Dict[str, Any]):
|
||||
macro_name: str = macro.name
|
||||
|
||||
# MacroGenerator is in clients/jinja.py
|
||||
# a MacroGenerator object is a callable object that will
|
||||
# execute the MacroGenerator.__call__ function
|
||||
macro_func: MacroGenerator = MacroGenerator(
|
||||
macro, ctx, self.node, self.thread_ctx
|
||||
)
|
||||
@@ -122,10 +148,12 @@ class MacroNamespaceBuilder:
|
||||
if macro.package_name in self.internal_package_names:
|
||||
self._add_macro_to(self.internal_packages, macro, macro_func)
|
||||
else:
|
||||
# if it's not an internal package
|
||||
self._add_macro_to(self.packages, macro, macro_func)
|
||||
|
||||
# add to locals if it's the package this node is in
|
||||
if macro.package_name == self.search_package:
|
||||
self.locals[macro_name] = macro_func
|
||||
# add to globals if it's in the root package
|
||||
elif macro.package_name == self.root_package:
|
||||
self.globals[macro_name] = macro_func
|
||||
|
||||
@@ -143,11 +171,12 @@ class MacroNamespaceBuilder:
|
||||
global_project_namespace: FlatNamespace = {}
|
||||
for pkg in reversed(self.internal_package_names_order):
|
||||
if pkg in self.internal_packages:
|
||||
# add the macros pointed to by this package name
|
||||
global_project_namespace.update(self.internal_packages[pkg])
|
||||
|
||||
return MacroNamespace(
|
||||
global_namespace=self.globals,
|
||||
local_namespace=self.locals,
|
||||
global_project_namespace=global_project_namespace,
|
||||
packages=self.packages,
|
||||
global_namespace=self.globals, # root package macros
|
||||
local_namespace=self.locals, # packages for *this* node
|
||||
global_project_namespace=global_project_namespace, # internal packages
|
||||
packages=self.packages, # non internal_packages
|
||||
)
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import List
|
||||
from dbt.clients.jinja import MacroStack
|
||||
from dbt.contracts.connection import AdapterRequiredConfig
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.context.macro_resolver import TestMacroNamespace
|
||||
|
||||
|
||||
from .configured import ConfiguredContext
|
||||
@@ -24,12 +25,20 @@ class ManifestContext(ConfiguredContext):
|
||||
) -> None:
|
||||
super().__init__(config)
|
||||
self.manifest = manifest
|
||||
# this is the package of the node for which this context was built
|
||||
self.search_package = search_package
|
||||
self.macro_stack = MacroStack()
|
||||
# This namespace is used by the BaseDatabaseWrapper in jinja rendering.
|
||||
# The namespace is passed to it when it's constructed. It expects
|
||||
# to be able to do: namespace.get_from_package(..)
|
||||
self.namespace = self._build_namespace()
|
||||
|
||||
def _build_namespace(self):
|
||||
# this takes all the macros in the manifest and adds them
|
||||
# to the MacroNamespaceBuilder stored in self.namespace
|
||||
builder = self._get_namespace_builder()
|
||||
self.namespace = builder.build_namespace(
|
||||
self.manifest.macros.values(),
|
||||
self._ctx,
|
||||
return builder.build_namespace(
|
||||
self.manifest.macros.values(), self._ctx
|
||||
)
|
||||
|
||||
def _get_namespace_builder(self) -> MacroNamespaceBuilder:
|
||||
@@ -46,9 +55,15 @@ class ManifestContext(ConfiguredContext):
|
||||
None,
|
||||
)
|
||||
|
||||
# This does not use the Mashumaro code
|
||||
def to_dict(self):
|
||||
dct = super().to_dict()
|
||||
dct.update(self.namespace)
|
||||
# This moves all of the macros in the 'namespace' into top level
|
||||
# keys in the manifest dictionary
|
||||
if isinstance(self.namespace, TestMacroNamespace):
|
||||
dct.update(self.namespace.local_namespace)
|
||||
else:
|
||||
dct.update(self.namespace)
|
||||
return dct
|
||||
|
||||
|
||||
|
||||
@@ -8,16 +8,22 @@ from typing_extensions import Protocol
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.adapters.base.column import Column
|
||||
from dbt.adapters.factory import get_adapter, get_adapter_package_names
|
||||
from dbt.adapters.factory import (
|
||||
get_adapter, get_adapter_package_names, get_adapter_type_names
|
||||
)
|
||||
from dbt.clients import agate_helper
|
||||
from dbt.clients.jinja import get_rendered, MacroGenerator
|
||||
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack
|
||||
from dbt.config import RuntimeConfig, Project
|
||||
from .base import contextmember, contextproperty, Var
|
||||
from .configured import FQNLookup
|
||||
from .context_config import ContextConfig
|
||||
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
|
||||
from .macros import MacroNamespaceBuilder, MacroNamespace
|
||||
from .manifest import ManifestContext
|
||||
from dbt.contracts.graph.manifest import Manifest, Disabled
|
||||
from dbt.contracts.connection import AdapterResponse
|
||||
from dbt.contracts.graph.manifest import (
|
||||
Manifest, Disabled
|
||||
)
|
||||
from dbt.contracts.graph.compiled import (
|
||||
CompiledResource,
|
||||
CompiledSeedNode,
|
||||
@@ -83,6 +89,7 @@ class BaseDatabaseWrapper:
|
||||
Wrapper for runtime database interaction. Applies the runtime quote policy
|
||||
via a relation proxy.
|
||||
"""
|
||||
|
||||
def __init__(self, adapter, namespace: MacroNamespace):
|
||||
self._adapter = adapter
|
||||
self.Relation = RelationProxy(adapter)
|
||||
@@ -102,10 +109,11 @@ class BaseDatabaseWrapper:
|
||||
return self._adapter.commit_if_has_connection()
|
||||
|
||||
def _get_adapter_macro_prefixes(self) -> List[str]:
|
||||
# a future version of this could have plugins automatically call fall
|
||||
# back to their dependencies' dependencies by using
|
||||
# `get_adapter_type_names` instead of `[self.config.credentials.type]`
|
||||
search_prefixes = [self._adapter.type(), 'default']
|
||||
# order matters for dispatch:
|
||||
# 1. current adapter
|
||||
# 2. any parent adapters (dependencies)
|
||||
# 3. 'default'
|
||||
search_prefixes = get_adapter_type_names(self.config.credentials.type) + ['default']
|
||||
return search_prefixes
|
||||
|
||||
def dispatch(
|
||||
@@ -139,6 +147,7 @@ class BaseDatabaseWrapper:
|
||||
for prefix in self._get_adapter_macro_prefixes():
|
||||
search_name = f'{prefix}__{macro_name}'
|
||||
try:
|
||||
# this uses the namespace from the context
|
||||
macro = self._namespace.get_from_package(
|
||||
package_name, search_name
|
||||
)
|
||||
@@ -379,6 +388,7 @@ class ParseDatabaseWrapper(BaseDatabaseWrapper):
|
||||
"""The parser subclass of the database wrapper applies any explicit
|
||||
parse-time overrides.
|
||||
"""
|
||||
|
||||
def __getattr__(self, name):
|
||||
override = (name in self._adapter._available_ and
|
||||
name in self._adapter._parse_replacements_)
|
||||
@@ -399,6 +409,7 @@ class RuntimeDatabaseWrapper(BaseDatabaseWrapper):
|
||||
"""The runtime database wrapper exposes everything the adapter marks
|
||||
available.
|
||||
"""
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._adapter._available_:
|
||||
return getattr(self._adapter, name)
|
||||
@@ -634,10 +645,13 @@ class ProviderContext(ManifestContext):
|
||||
self.context_config: Optional[ContextConfig] = context_config
|
||||
self.provider: Provider = provider
|
||||
self.adapter = get_adapter(self.config)
|
||||
# The macro namespace is used in creating the DatabaseWrapper
|
||||
self.db_wrapper = self.provider.DatabaseWrapper(
|
||||
self.adapter, self.namespace
|
||||
)
|
||||
|
||||
# This overrides the method in ManifestContext, and provides
|
||||
# a model, which the ManifestContext builder does not
|
||||
def _get_namespace_builder(self):
|
||||
internal_packages = get_adapter_package_names(
|
||||
self.config.credentials.type
|
||||
@@ -660,18 +674,33 @@ class ProviderContext(ManifestContext):
|
||||
|
||||
@contextmember
|
||||
def store_result(
|
||||
self, name: str, status: Any, agate_table: Optional[agate.Table] = None
|
||||
self, name: str,
|
||||
response: Any,
|
||||
agate_table: Optional[agate.Table] = None
|
||||
) -> str:
|
||||
if agate_table is None:
|
||||
agate_table = agate_helper.empty_table()
|
||||
|
||||
self.sql_results[name] = AttrDict({
|
||||
'status': status,
|
||||
'response': response,
|
||||
'data': agate_helper.as_matrix(agate_table),
|
||||
'table': agate_table
|
||||
})
|
||||
return ''
|
||||
|
||||
@contextmember
|
||||
def store_raw_result(
|
||||
self,
|
||||
name: str,
|
||||
message=Optional[str],
|
||||
code=Optional[str],
|
||||
rows_affected=Optional[str],
|
||||
agate_table: Optional[agate.Table] = None
|
||||
) -> str:
|
||||
response = AdapterResponse(
|
||||
_message=message, code=code, rows_affected=rows_affected)
|
||||
return self.store_result(name, response, agate_table)
|
||||
|
||||
@contextproperty
|
||||
def validation(self):
|
||||
def validate_any(*args) -> Callable[[T], None]:
|
||||
@@ -1089,7 +1118,7 @@ class ProviderContext(ManifestContext):
|
||||
|
||||
@contextproperty('model')
|
||||
def ctx_model(self) -> Dict[str, Any]:
|
||||
return self.model.to_dict()
|
||||
return self.model.to_dict(omit_none=True)
|
||||
|
||||
@contextproperty
|
||||
def pre_hooks(self) -> Optional[List[Dict[str, Any]]]:
|
||||
@@ -1179,6 +1208,7 @@ class MacroContext(ProviderContext):
|
||||
- 'schema' does not use any 'model' information
|
||||
- they can't be configured with config() directives
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: ParsedMacro,
|
||||
@@ -1204,7 +1234,7 @@ class ModelContext(ProviderContext):
|
||||
if isinstance(self.model, ParsedSourceDefinition):
|
||||
return []
|
||||
return [
|
||||
h.to_dict() for h in self.model.config.pre_hook
|
||||
h.to_dict(omit_none=True) for h in self.model.config.pre_hook
|
||||
]
|
||||
|
||||
@contextproperty
|
||||
@@ -1212,7 +1242,7 @@ class ModelContext(ProviderContext):
|
||||
if isinstance(self.model, ParsedSourceDefinition):
|
||||
return []
|
||||
return [
|
||||
h.to_dict() for h in self.model.config.post_hook
|
||||
h.to_dict(omit_none=True) for h in self.model.config.post_hook
|
||||
]
|
||||
|
||||
@contextproperty
|
||||
@@ -1269,27 +1299,21 @@ class ModelContext(ProviderContext):
|
||||
return self.db_wrapper.Relation.create_from(self.config, self.model)
|
||||
|
||||
|
||||
# This is called by '_context_for', used in 'render_with_context'
|
||||
def generate_parser_model(
|
||||
model: ManifestNode,
|
||||
config: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
context_config: ContextConfig,
|
||||
) -> Dict[str, Any]:
|
||||
# The __init__ method of ModelContext also initializes
|
||||
# a ManifestContext object which creates a MacroNamespaceBuilder
|
||||
# which adds every macro in the Manifest.
|
||||
ctx = ModelContext(
|
||||
model, config, manifest, ParseProvider(), context_config
|
||||
)
|
||||
return ctx.to_dict()
|
||||
|
||||
|
||||
def generate_parser_macro(
|
||||
macro: ParsedMacro,
|
||||
config: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
package_name: Optional[str],
|
||||
) -> Dict[str, Any]:
|
||||
ctx = MacroContext(
|
||||
macro, config, manifest, ParseProvider(), package_name
|
||||
)
|
||||
# The 'to_dict' method in ManifestContext moves all of the macro names
|
||||
# in the macro 'namespace' up to top level keys
|
||||
return ctx.to_dict()
|
||||
|
||||
|
||||
@@ -1367,3 +1391,68 @@ def generate_parse_exposure(
|
||||
manifest,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# This class is currently used by the schema parser in order
|
||||
# to limit the number of macros in the context by using
|
||||
# the TestMacroNamespace
|
||||
class TestContext(ProviderContext):
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
config: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
provider: Provider,
|
||||
context_config: Optional[ContextConfig],
|
||||
macro_resolver: MacroResolver,
|
||||
) -> None:
|
||||
# this must be before super init so that macro_resolver exists for
|
||||
# build_namespace
|
||||
self.macro_resolver = macro_resolver
|
||||
self.thread_ctx = MacroStack()
|
||||
super().__init__(model, config, manifest, provider, context_config)
|
||||
self._build_test_namespace()
|
||||
# We need to rebuild this because it's already been built by
|
||||
# the ProviderContext with the wrong namespace.
|
||||
self.db_wrapper = self.provider.DatabaseWrapper(
|
||||
self.adapter, self.namespace
|
||||
)
|
||||
|
||||
def _build_namespace(self):
|
||||
return {}
|
||||
|
||||
# this overrides _build_namespace in ManifestContext which provides a
|
||||
# complete namespace of all macros to only specify macros in the depends_on
|
||||
# This only provides a namespace with macros in the test node
|
||||
# 'depends_on.macros' by using the TestMacroNamespace
|
||||
def _build_test_namespace(self):
|
||||
depends_on_macros = []
|
||||
if self.model.depends_on and self.model.depends_on.macros:
|
||||
depends_on_macros = self.model.depends_on.macros
|
||||
lookup_macros = depends_on_macros.copy()
|
||||
for macro_unique_id in lookup_macros:
|
||||
lookup_macro = self.macro_resolver.macros.get(macro_unique_id)
|
||||
if lookup_macro:
|
||||
depends_on_macros.extend(lookup_macro.depends_on.macros)
|
||||
|
||||
macro_namespace = TestMacroNamespace(
|
||||
self.macro_resolver, self._ctx, self.model, self.thread_ctx,
|
||||
depends_on_macros
|
||||
)
|
||||
self.namespace = macro_namespace
|
||||
|
||||
|
||||
def generate_test_context(
|
||||
model: ManifestNode,
|
||||
config: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
context_config: ContextConfig,
|
||||
macro_resolver: MacroResolver
|
||||
) -> Dict[str, Any]:
|
||||
ctx = TestContext(
|
||||
model, config, manifest, ParseProvider(), context_config,
|
||||
macro_resolver
|
||||
)
|
||||
# The 'to_dict' method in ManifestContext moves all of the macro names
|
||||
# in the macro 'namespace' up to top level keys
|
||||
return ctx.to_dict()
|
||||
|
||||
@@ -2,26 +2,37 @@ import abc
|
||||
import itertools
|
||||
from dataclasses import dataclass, field
|
||||
from typing import (
|
||||
Any, ClassVar, Dict, Tuple, Iterable, Optional, NewType, List, Callable,
|
||||
Any, ClassVar, Dict, Tuple, Iterable, Optional, List, Callable,
|
||||
)
|
||||
from typing_extensions import Protocol
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import (
|
||||
StrEnum, register_pattern, ExtensibleJsonSchemaMixin
|
||||
)
|
||||
|
||||
from dbt.contracts.util import Replaceable
|
||||
from dbt.exceptions import InternalException
|
||||
from dbt.utils import translate_aliases
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from typing_extensions import Protocol
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
|
||||
ValidatedStringMixin, register_pattern
|
||||
)
|
||||
from dbt.contracts.util import Replaceable
|
||||
|
||||
|
||||
Identifier = NewType('Identifier', str)
|
||||
class Identifier(ValidatedStringMixin):
|
||||
ValidationRegex = r'^[A-Za-z_][A-Za-z0-9_]+$'
|
||||
|
||||
|
||||
# we need register_pattern for jsonschema validation
|
||||
register_pattern(Identifier, r'^[A-Za-z_][A-Za-z0-9_]+$')
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterResponse(dbtClassMixin):
|
||||
_message: str
|
||||
code: Optional[str] = None
|
||||
rows_affected: Optional[int] = None
|
||||
|
||||
def __str__(self):
|
||||
return self._message
|
||||
|
||||
|
||||
class ConnectionState(StrEnum):
|
||||
INIT = 'init'
|
||||
OPEN = 'open'
|
||||
@@ -30,20 +41,19 @@ class ConnectionState(StrEnum):
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class Connection(ExtensibleJsonSchemaMixin, Replaceable):
|
||||
class Connection(ExtensibleDbtClassMixin, Replaceable):
|
||||
type: Identifier
|
||||
name: Optional[str]
|
||||
name: Optional[str] = None
|
||||
state: ConnectionState = ConnectionState.INIT
|
||||
transaction_open: bool = False
|
||||
# prevent serialization
|
||||
_handle: Optional[Any] = None
|
||||
_credentials: JsonSchemaMixin = field(init=False)
|
||||
_credentials: Optional[Any] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
type: Identifier,
|
||||
name: Optional[str],
|
||||
credentials: JsonSchemaMixin,
|
||||
credentials: dbtClassMixin,
|
||||
state: ConnectionState = ConnectionState.INIT,
|
||||
transaction_open: bool = False,
|
||||
handle: Optional[Any] = None,
|
||||
@@ -85,6 +95,7 @@ class LazyHandle:
|
||||
"""Opener must be a callable that takes a Connection object and opens the
|
||||
connection, updating the handle on the Connection.
|
||||
"""
|
||||
|
||||
def __init__(self, opener: Callable[[Connection], Connection]):
|
||||
self.opener = opener
|
||||
|
||||
@@ -102,7 +113,7 @@ class LazyHandle:
|
||||
# will work.
|
||||
@dataclass # type: ignore
|
||||
class Credentials(
|
||||
ExtensibleJsonSchemaMixin,
|
||||
ExtensibleDbtClassMixin,
|
||||
Replaceable,
|
||||
metaclass=abc.ABCMeta
|
||||
):
|
||||
@@ -121,7 +132,7 @@ class Credentials(
|
||||
) -> Iterable[Tuple[str, Any]]:
|
||||
"""Return an ordered iterator of key/value pairs for pretty-printing.
|
||||
"""
|
||||
as_dict = self.to_dict(omit_none=False, with_aliases=with_aliases)
|
||||
as_dict = self.to_dict(omit_none=False)
|
||||
connection_keys = set(self._connection_keys())
|
||||
aliases: List[str] = []
|
||||
if with_aliases:
|
||||
@@ -137,9 +148,10 @@ class Credentials(
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data):
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
data = cls.translate_aliases(data)
|
||||
return super().from_dict(data)
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def translate_aliases(
|
||||
@@ -147,31 +159,26 @@ class Credentials(
|
||||
) -> Dict[str, Any]:
|
||||
return translate_aliases(kwargs, cls._ALIASES, recurse)
|
||||
|
||||
def to_dict(self, omit_none=True, validate=False, *, with_aliases=False):
|
||||
serialized = super().to_dict(omit_none=omit_none, validate=validate)
|
||||
if with_aliases:
|
||||
serialized.update({
|
||||
new_name: serialized[canonical_name]
|
||||
def __post_serialize__(self, dct):
|
||||
# no super() -- do we need it?
|
||||
if self._ALIASES:
|
||||
dct.update({
|
||||
new_name: dct[canonical_name]
|
||||
for new_name, canonical_name in self._ALIASES.items()
|
||||
if canonical_name in serialized
|
||||
if canonical_name in dct
|
||||
})
|
||||
return serialized
|
||||
return dct
|
||||
|
||||
|
||||
class UserConfigContract(Protocol):
|
||||
send_anonymous_usage_stats: bool
|
||||
use_colors: Optional[bool]
|
||||
partial_parse: Optional[bool]
|
||||
printer_width: Optional[int]
|
||||
use_colors: Optional[bool] = None
|
||||
partial_parse: Optional[bool] = None
|
||||
printer_width: Optional[int] = None
|
||||
|
||||
def set_values(self, cookie_dir: str) -> None:
|
||||
...
|
||||
|
||||
def to_dict(
|
||||
self, omit_none: bool = True, validate: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
...
|
||||
|
||||
|
||||
class HasCredentials(Protocol):
|
||||
credentials: Credentials
|
||||
@@ -205,9 +212,10 @@ DEFAULT_QUERY_COMMENT = '''
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryComment(JsonSchemaMixin):
|
||||
class QueryComment(HyphenatedDbtClassMixin):
|
||||
comment: str = DEFAULT_QUERY_COMMENT
|
||||
append: bool = False
|
||||
job_label: bool = False
|
||||
|
||||
|
||||
class AdapterRequiredConfig(HasCredentials, Protocol):
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
|
||||
from dbt.exceptions import InternalException
|
||||
|
||||
@@ -14,8 +14,20 @@ MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
|
||||
MAXIMUM_SEED_SIZE_NAME = '1MB'
|
||||
|
||||
|
||||
class ParseFileType(StrEnum):
|
||||
Macro = 'macro'
|
||||
Model = 'model'
|
||||
Snapshot = 'snapshot'
|
||||
Analysis = 'analysis'
|
||||
Test = 'test'
|
||||
Seed = 'seed'
|
||||
Documentation = 'docs'
|
||||
Schema = 'schema'
|
||||
Hook = 'hook'
|
||||
|
||||
|
||||
@dataclass
|
||||
class FilePath(JsonSchemaMixin):
|
||||
class FilePath(dbtClassMixin):
|
||||
searched_path: str
|
||||
relative_path: str
|
||||
project_root: str
|
||||
@@ -51,7 +63,7 @@ class FilePath(JsonSchemaMixin):
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileHash(JsonSchemaMixin):
|
||||
class FileHash(dbtClassMixin):
|
||||
name: str # the hash type name
|
||||
checksum: str # the hashlib.hash_type().hexdigest() of the file contents
|
||||
|
||||
@@ -91,7 +103,7 @@ class FileHash(JsonSchemaMixin):
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteFile(JsonSchemaMixin):
|
||||
class RemoteFile(dbtClassMixin):
|
||||
@property
|
||||
def searched_path(self) -> str:
|
||||
return 'from remote system'
|
||||
@@ -110,10 +122,14 @@ class RemoteFile(JsonSchemaMixin):
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFile(JsonSchemaMixin):
|
||||
class SourceFile(dbtClassMixin):
|
||||
"""Define a source file in dbt"""
|
||||
path: Union[FilePath, RemoteFile] # the path information
|
||||
checksum: FileHash
|
||||
# Seems like knowing which project the file came from would be useful
|
||||
project_name: Optional[str] = None
|
||||
# Parse file type: i.e. which parser will process this file
|
||||
parse_file_type: Optional[ParseFileType] = None
|
||||
# we don't want to serialize this
|
||||
_contents: Optional[str] = None
|
||||
# the unique IDs contained in this file
|
||||
@@ -156,7 +172,7 @@ class SourceFile(JsonSchemaMixin):
|
||||
@classmethod
|
||||
def big_seed(cls, path: FilePath) -> 'SourceFile':
|
||||
"""Parse seeds over the size limit with just the path"""
|
||||
self = cls(path=path, checksum=FileHash.path(path.absolute_path))
|
||||
self = cls(path=path, checksum=FileHash.path(path.original_file_path))
|
||||
self.contents = ''
|
||||
return self
|
||||
|
||||
|
||||
@@ -19,19 +19,19 @@ from dbt.contracts.graph.parsed import (
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.contracts.util import Replaceable
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Union, Dict, Type
|
||||
|
||||
|
||||
@dataclass
|
||||
class InjectedCTE(JsonSchemaMixin, Replaceable):
|
||||
class InjectedCTE(dbtClassMixin, Replaceable):
|
||||
id: str
|
||||
sql: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompiledNodeMixin(JsonSchemaMixin):
|
||||
class CompiledNodeMixin(dbtClassMixin):
|
||||
# this is a special mixin class to provide a required argument. If a node
|
||||
# is missing a `compiled` flag entirely, it must not be a CompiledNode.
|
||||
compiled: bool
|
||||
@@ -178,8 +178,7 @@ def parsed_instance_for(compiled: CompiledNode) -> ParsedResource:
|
||||
raise ValueError('invalid resource_type: {}'
|
||||
.format(compiled.resource_type))
|
||||
|
||||
# validate=False to allow extra keys from compiling
|
||||
return cls.from_dict(compiled.to_dict(), validate=False)
|
||||
return cls.from_dict(compiled.to_dict(omit_none=True))
|
||||
|
||||
|
||||
NonSourceCompiledNode = Union[
|
||||
|
||||
@@ -15,19 +15,25 @@ from dbt.contracts.graph.compiled import (
|
||||
)
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedMacro, ParsedDocumentation, ParsedNodePatch, ParsedMacroPatch,
|
||||
ParsedSourceDefinition, ParsedExposure
|
||||
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
|
||||
UnpatchedSourceDefinition, ManifestNodes
|
||||
)
|
||||
from dbt.contracts.files import SourceFile
|
||||
from dbt.contracts.graph.unparsed import SourcePatch
|
||||
from dbt.contracts.files import SourceFile, FileHash, RemoteFile
|
||||
from dbt.contracts.util import (
|
||||
BaseArtifactMetadata, MacroKey, SourceKey, ArtifactMixin, schema_version
|
||||
)
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dbt.exceptions import (
|
||||
InternalException, CompilationException,
|
||||
raise_duplicate_resource_name, raise_compiler_error, warn_or_error,
|
||||
raise_invalid_patch,
|
||||
raise_invalid_patch, raise_duplicate_patch_name,
|
||||
raise_duplicate_macro_patch_name, raise_duplicate_source_patch_name,
|
||||
)
|
||||
from dbt.helper_types import PathSet
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.ui import line_wrap_message
|
||||
from dbt import deprecations
|
||||
from dbt import flags
|
||||
from dbt import tracking
|
||||
@@ -115,7 +121,8 @@ class SourceCache(PackageAwareCache[SourceKey, ParsedSourceDefinition]):
|
||||
|
||||
def populate(self):
|
||||
for source in self._manifest.sources.values():
|
||||
self.add_source(source)
|
||||
if hasattr(source, 'source_name'):
|
||||
self.add_source(source)
|
||||
|
||||
def perform_lookup(
|
||||
self, unique_id: UniqueID
|
||||
@@ -234,12 +241,13 @@ def build_edges(nodes: List[ManifestNode]):
|
||||
for node in nodes:
|
||||
backward_edges[node.unique_id] = node.depends_on_nodes[:]
|
||||
for unique_id in node.depends_on_nodes:
|
||||
forward_edges[unique_id].append(node.unique_id)
|
||||
if unique_id in forward_edges.keys():
|
||||
forward_edges[unique_id].append(node.unique_id)
|
||||
return _sort_values(forward_edges), _sort_values(backward_edges)
|
||||
|
||||
|
||||
def _deepcopy(value):
|
||||
return value.from_dict(value.to_dict())
|
||||
return value.from_dict(value.to_dict(omit_none=True))
|
||||
|
||||
|
||||
class Locality(enum.IntEnum):
|
||||
@@ -427,158 +435,13 @@ def _update_into(dest: MutableMapping[str, T], new_item: T):
|
||||
dest[unique_id] = new_item
|
||||
|
||||
|
||||
@dataclass
|
||||
class Manifest:
|
||||
"""The manifest for the full graph, after parsing and during compilation.
|
||||
"""
|
||||
# These attributes are both positional and by keyword. If an attribute
|
||||
# is added it must all be added in the __reduce_ex__ method in the
|
||||
# args tuple in the right position.
|
||||
nodes: MutableMapping[str, ManifestNode]
|
||||
sources: MutableMapping[str, ParsedSourceDefinition]
|
||||
macros: MutableMapping[str, ParsedMacro]
|
||||
docs: MutableMapping[str, ParsedDocumentation]
|
||||
exposures: MutableMapping[str, ParsedExposure]
|
||||
selectors: MutableMapping[str, Any]
|
||||
disabled: List[CompileResultNode]
|
||||
files: MutableMapping[str, SourceFile]
|
||||
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
|
||||
flat_graph: Dict[str, Any] = field(default_factory=dict)
|
||||
_docs_cache: Optional[DocCache] = None
|
||||
_sources_cache: Optional[SourceCache] = None
|
||||
_refs_cache: Optional[RefableCache] = None
|
||||
_lock: Lock = field(default_factory=flags.MP_CONTEXT.Lock)
|
||||
|
||||
@classmethod
|
||||
def from_macros(
|
||||
cls,
|
||||
macros: Optional[MutableMapping[str, ParsedMacro]] = None,
|
||||
files: Optional[MutableMapping[str, SourceFile]] = None,
|
||||
) -> 'Manifest':
|
||||
if macros is None:
|
||||
macros = {}
|
||||
if files is None:
|
||||
files = {}
|
||||
return cls(
|
||||
nodes={},
|
||||
sources={},
|
||||
macros=macros,
|
||||
docs={},
|
||||
exposures={},
|
||||
selectors={},
|
||||
disabled=[],
|
||||
files=files,
|
||||
)
|
||||
|
||||
def sync_update_node(
|
||||
self, new_node: NonSourceCompiledNode
|
||||
) -> NonSourceCompiledNode:
|
||||
"""update the node with a lock. The only time we should want to lock is
|
||||
when compiling an ephemeral ancestor of a node at runtime, because
|
||||
multiple threads could be just-in-time compiling the same ephemeral
|
||||
dependency, and we want them to have a consistent view of the manifest.
|
||||
|
||||
If the existing node is not compiled, update it with the new node and
|
||||
return that. If the existing node is compiled, do not update the
|
||||
manifest and return the existing node.
|
||||
"""
|
||||
with self._lock:
|
||||
existing = self.nodes[new_node.unique_id]
|
||||
if getattr(existing, 'compiled', False):
|
||||
# already compiled -> must be a NonSourceCompiledNode
|
||||
return cast(NonSourceCompiledNode, existing)
|
||||
_update_into(self.nodes, new_node)
|
||||
return new_node
|
||||
|
||||
def update_exposure(self, new_exposure: ParsedExposure):
|
||||
_update_into(self.exposures, new_exposure)
|
||||
|
||||
def update_node(self, new_node: ManifestNode):
|
||||
_update_into(self.nodes, new_node)
|
||||
|
||||
def update_source(self, new_source: ParsedSourceDefinition):
|
||||
_update_into(self.sources, new_source)
|
||||
|
||||
def build_flat_graph(self):
|
||||
"""This attribute is used in context.common by each node, so we want to
|
||||
only build it once and avoid any concurrency issues around it.
|
||||
Make sure you don't call this until you're done with building your
|
||||
manifest!
|
||||
"""
|
||||
self.flat_graph = {
|
||||
'nodes': {
|
||||
k: v.to_dict(omit_none=False) for k, v in self.nodes.items()
|
||||
},
|
||||
'sources': {
|
||||
k: v.to_dict(omit_none=False) for k, v in self.sources.items()
|
||||
}
|
||||
}
|
||||
|
||||
def find_disabled_by_name(
|
||||
self, name: str, package: Optional[str] = None
|
||||
) -> Optional[ManifestNode]:
|
||||
searcher: NameSearcher = NameSearcher(
|
||||
name, package, NodeType.refable()
|
||||
)
|
||||
result = searcher.search(self.disabled)
|
||||
return result
|
||||
|
||||
def find_disabled_source_by_name(
|
||||
self, source_name: str, table_name: str, package: Optional[str] = None
|
||||
) -> Optional[ParsedSourceDefinition]:
|
||||
search_name = f'{source_name}.{table_name}'
|
||||
searcher: NameSearcher = NameSearcher(
|
||||
search_name, package, [NodeType.Source]
|
||||
)
|
||||
result = searcher.search(self.disabled)
|
||||
if result is not None:
|
||||
assert isinstance(result, ParsedSourceDefinition)
|
||||
return result
|
||||
|
||||
def _find_macros_by_name(
|
||||
self,
|
||||
name: str,
|
||||
root_project_name: str,
|
||||
filter: Optional[Callable[[MacroCandidate], bool]] = None
|
||||
) -> CandidateList:
|
||||
"""Find macros by their name.
|
||||
"""
|
||||
# avoid an import cycle
|
||||
from dbt.adapters.factory import get_adapter_package_names
|
||||
candidates: CandidateList = CandidateList()
|
||||
packages = set(get_adapter_package_names(self.metadata.adapter_type))
|
||||
for unique_id, macro in self.macros.items():
|
||||
if macro.name != name:
|
||||
continue
|
||||
candidate = MacroCandidate(
|
||||
locality=_get_locality(macro, root_project_name, packages),
|
||||
macro=macro,
|
||||
)
|
||||
if filter is None or filter(candidate):
|
||||
candidates.append(candidate)
|
||||
|
||||
return candidates
|
||||
|
||||
def _materialization_candidates_for(
|
||||
self, project_name: str,
|
||||
materialization_name: str,
|
||||
adapter_type: Optional[str],
|
||||
) -> CandidateList:
|
||||
|
||||
if adapter_type is None:
|
||||
specificity = Specificity.Default
|
||||
else:
|
||||
specificity = Specificity.Adapter
|
||||
|
||||
full_name = dbt.utils.get_materialization_macro_name(
|
||||
materialization_name=materialization_name,
|
||||
adapter_type=adapter_type,
|
||||
with_prefix=False,
|
||||
)
|
||||
return CandidateList(
|
||||
MaterializationCandidate.from_macro(m, specificity)
|
||||
for m in self._find_macros_by_name(full_name, project_name)
|
||||
)
|
||||
# This contains macro methods that are in both the Manifest
|
||||
# and the MacroManifest
|
||||
class MacroMethods:
|
||||
# Just to make mypy happy. There must be a better way.
|
||||
def __init__(self):
|
||||
self.macros = []
|
||||
self.metadata = {}
|
||||
|
||||
def find_macro_by_name(
|
||||
self, name: str, root_project_name: str, package: Optional[str]
|
||||
@@ -624,6 +487,159 @@ class Manifest:
|
||||
)
|
||||
return candidates.last()
|
||||
|
||||
def _find_macros_by_name(
|
||||
self,
|
||||
name: str,
|
||||
root_project_name: str,
|
||||
filter: Optional[Callable[[MacroCandidate], bool]] = None
|
||||
) -> CandidateList:
|
||||
"""Find macros by their name.
|
||||
"""
|
||||
# avoid an import cycle
|
||||
from dbt.adapters.factory import get_adapter_package_names
|
||||
candidates: CandidateList = CandidateList()
|
||||
packages = set(get_adapter_package_names(self.metadata.adapter_type))
|
||||
for unique_id, macro in self.macros.items():
|
||||
if macro.name != name:
|
||||
continue
|
||||
candidate = MacroCandidate(
|
||||
locality=_get_locality(macro, root_project_name, packages),
|
||||
macro=macro,
|
||||
)
|
||||
if filter is None or filter(candidate):
|
||||
candidates.append(candidate)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManifestStateCheck(dbtClassMixin):
|
||||
vars_hash: FileHash
|
||||
profile_hash: FileHash
|
||||
project_hashes: MutableMapping[str, FileHash]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Manifest(MacroMethods):
|
||||
"""The manifest for the full graph, after parsing and during compilation.
|
||||
"""
|
||||
# These attributes are both positional and by keyword. If an attribute
|
||||
# is added it must all be added in the __reduce_ex__ method in the
|
||||
# args tuple in the right position.
|
||||
nodes: MutableMapping[str, ManifestNode]
|
||||
sources: MutableMapping[str, ParsedSourceDefinition]
|
||||
macros: MutableMapping[str, ParsedMacro]
|
||||
docs: MutableMapping[str, ParsedDocumentation]
|
||||
exposures: MutableMapping[str, ParsedExposure]
|
||||
selectors: MutableMapping[str, Any]
|
||||
disabled: List[CompileResultNode]
|
||||
files: MutableMapping[str, SourceFile]
|
||||
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
|
||||
flat_graph: Dict[str, Any] = field(default_factory=dict)
|
||||
state_check: Optional[ManifestStateCheck] = None
|
||||
# Moved from the ParseResult object
|
||||
macro_patches: MutableMapping[MacroKey, ParsedMacroPatch] = field(default_factory=dict)
|
||||
patches: MutableMapping[str, ParsedNodePatch] = field(default_factory=dict)
|
||||
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
|
||||
# following is from ParseResult
|
||||
_disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
|
||||
_docs_cache: Optional[DocCache] = None
|
||||
_sources_cache: Optional[SourceCache] = None
|
||||
_refs_cache: Optional[RefableCache] = None
|
||||
_lock: Lock = field(default_factory=flags.MP_CONTEXT.Lock)
|
||||
|
||||
def sync_update_node(
|
||||
self, new_node: NonSourceCompiledNode
|
||||
) -> NonSourceCompiledNode:
|
||||
"""update the node with a lock. The only time we should want to lock is
|
||||
when compiling an ephemeral ancestor of a node at runtime, because
|
||||
multiple threads could be just-in-time compiling the same ephemeral
|
||||
dependency, and we want them to have a consistent view of the manifest.
|
||||
|
||||
If the existing node is not compiled, update it with the new node and
|
||||
return that. If the existing node is compiled, do not update the
|
||||
manifest and return the existing node.
|
||||
"""
|
||||
with self._lock:
|
||||
existing = self.nodes[new_node.unique_id]
|
||||
if getattr(existing, 'compiled', False):
|
||||
# already compiled -> must be a NonSourceCompiledNode
|
||||
return cast(NonSourceCompiledNode, existing)
|
||||
_update_into(self.nodes, new_node)
|
||||
return new_node
|
||||
|
||||
def update_exposure(self, new_exposure: ParsedExposure):
|
||||
_update_into(self.exposures, new_exposure)
|
||||
|
||||
def update_node(self, new_node: ManifestNode):
|
||||
_update_into(self.nodes, new_node)
|
||||
|
||||
def update_source(self, new_source: ParsedSourceDefinition):
|
||||
_update_into(self.sources, new_source)
|
||||
|
||||
def build_flat_graph(self):
|
||||
"""This attribute is used in context.common by each node, so we want to
|
||||
only build it once and avoid any concurrency issues around it.
|
||||
Make sure you don't call this until you're done with building your
|
||||
manifest!
|
||||
"""
|
||||
self.flat_graph = {
|
||||
'exposures': {
|
||||
k: v.to_dict(omit_none=False)
|
||||
for k, v in self.exposures.items()
|
||||
},
|
||||
'nodes': {
|
||||
k: v.to_dict(omit_none=False)
|
||||
for k, v in self.nodes.items()
|
||||
},
|
||||
'sources': {
|
||||
k: v.to_dict(omit_none=False)
|
||||
for k, v in self.sources.items()
|
||||
}
|
||||
}
|
||||
|
||||
def find_disabled_by_name(
|
||||
self, name: str, package: Optional[str] = None
|
||||
) -> Optional[ManifestNode]:
|
||||
searcher: NameSearcher = NameSearcher(
|
||||
name, package, NodeType.refable()
|
||||
)
|
||||
result = searcher.search(self.disabled)
|
||||
return result
|
||||
|
||||
def find_disabled_source_by_name(
|
||||
self, source_name: str, table_name: str, package: Optional[str] = None
|
||||
) -> Optional[ParsedSourceDefinition]:
|
||||
search_name = f'{source_name}.{table_name}'
|
||||
searcher: NameSearcher = NameSearcher(
|
||||
search_name, package, [NodeType.Source]
|
||||
)
|
||||
result = searcher.search(self.disabled)
|
||||
if result is not None:
|
||||
assert isinstance(result, ParsedSourceDefinition)
|
||||
return result
|
||||
|
||||
def _materialization_candidates_for(
|
||||
self, project_name: str,
|
||||
materialization_name: str,
|
||||
adapter_type: Optional[str],
|
||||
) -> CandidateList:
|
||||
|
||||
if adapter_type is None:
|
||||
specificity = Specificity.Default
|
||||
else:
|
||||
specificity = Specificity.Adapter
|
||||
|
||||
full_name = dbt.utils.get_materialization_macro_name(
|
||||
materialization_name=materialization_name,
|
||||
adapter_type=adapter_type,
|
||||
with_prefix=False,
|
||||
)
|
||||
return CandidateList(
|
||||
MaterializationCandidate.from_macro(m, specificity)
|
||||
for m in self._find_macros_by_name(full_name, project_name)
|
||||
)
|
||||
|
||||
def find_materialization_macro_by_name(
|
||||
self, project_name: str, materialization_name: str, adapter_type: str
|
||||
) -> Optional[ParsedMacro]:
|
||||
@@ -638,7 +654,7 @@ class Manifest:
|
||||
|
||||
def get_resource_fqns(self) -> Mapping[str, PathSet]:
|
||||
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
|
||||
all_resources = chain(self.nodes.values(), self.sources.values())
|
||||
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
|
||||
for resource in all_resources:
|
||||
resource_type_plural = resource.resource_type.pluralize()
|
||||
if resource_type_plural not in resource_fqns:
|
||||
@@ -657,26 +673,51 @@ class Manifest:
|
||||
if node.resource_type in NodeType.refable():
|
||||
self._refs_cache.add_node(node)
|
||||
|
||||
def patch_macros(
|
||||
self, patches: MutableMapping[MacroKey, ParsedMacroPatch]
|
||||
def add_patch(
|
||||
self, source_file: SourceFile, patch: ParsedNodePatch,
|
||||
) -> None:
|
||||
# patches can't be overwritten
|
||||
if patch.name in self.patches:
|
||||
raise_duplicate_patch_name(patch, self.patches[patch.name])
|
||||
self.patches[patch.name] = patch
|
||||
self.get_file(source_file).patches.append(patch.name)
|
||||
|
||||
def add_macro_patch(
|
||||
self, source_file: SourceFile, patch: ParsedMacroPatch,
|
||||
) -> None:
|
||||
# macros are fully namespaced
|
||||
key = (patch.package_name, patch.name)
|
||||
if key in self.macro_patches:
|
||||
raise_duplicate_macro_patch_name(patch, self.macro_patches[key])
|
||||
self.macro_patches[key] = patch
|
||||
self.get_file(source_file).macro_patches.append(key)
|
||||
|
||||
def add_source_patch(
|
||||
self, source_file: SourceFile, patch: SourcePatch,
|
||||
) -> None:
|
||||
# source patches must be unique
|
||||
key = (patch.overrides, patch.name)
|
||||
if key in self.source_patches:
|
||||
raise_duplicate_source_patch_name(patch, self.source_patches[key])
|
||||
self.source_patches[key] = patch
|
||||
self.get_file(source_file).source_patches.append(key)
|
||||
|
||||
def patch_macros(self) -> None:
|
||||
for macro in self.macros.values():
|
||||
key = (macro.package_name, macro.name)
|
||||
patch = patches.pop(key, None)
|
||||
patch = self.macro_patches.pop(key, None)
|
||||
if not patch:
|
||||
continue
|
||||
macro.patch(patch)
|
||||
|
||||
if patches:
|
||||
for patch in patches.values():
|
||||
if self.macro_patches:
|
||||
for patch in self.macro_patches.values():
|
||||
warn_or_error(
|
||||
f'WARNING: Found documentation for macro "{patch.name}" '
|
||||
f'which was not found'
|
||||
)
|
||||
|
||||
def patch_nodes(
|
||||
self, patches: MutableMapping[str, ParsedNodePatch]
|
||||
) -> None:
|
||||
def patch_nodes(self) -> None:
|
||||
"""Patch nodes with the given dict of patches. Note that this consumes
|
||||
the input!
|
||||
This relies on the fact that all nodes have unique _name_ fields, not
|
||||
@@ -686,8 +727,10 @@ class Manifest:
|
||||
# only have the node name in the patch, we have to iterate over all the
|
||||
# nodes looking for matching names. We could use a NameSearcher if we
|
||||
# were ok with doing an O(n*m) search (one nodes scan per patch)
|
||||
# Q: could we save patches by node unique_ids instead, or convert
|
||||
# between names and node ids?
|
||||
for node in self.nodes.values():
|
||||
patch = patches.pop(node.name, None)
|
||||
patch = self.patches.pop(node.name, None)
|
||||
if not patch:
|
||||
continue
|
||||
|
||||
@@ -705,9 +748,10 @@ class Manifest:
|
||||
|
||||
node.patch(patch)
|
||||
|
||||
# log debug-level warning about nodes we couldn't find
|
||||
if patches:
|
||||
for patch in patches.values():
|
||||
# If anything is left in self.patches, it means that the node for
|
||||
# that patch wasn't found.
|
||||
if self.patches:
|
||||
for patch in self.patches.values():
|
||||
# since patches aren't nodes, we can't use the existing
|
||||
# target_not_found warning
|
||||
logger.debug((
|
||||
@@ -728,6 +772,7 @@ class Manifest:
|
||||
chain(self.nodes.values(), self.sources.values())
|
||||
)
|
||||
|
||||
# This is used in dbt.task.rpc.sql_commands 'add_new_refs'
|
||||
def deepcopy(self):
|
||||
return Manifest(
|
||||
nodes={k: _deepcopy(v) for k, v in self.nodes.items()},
|
||||
@@ -735,10 +780,11 @@ class Manifest:
|
||||
macros={k: _deepcopy(v) for k, v in self.macros.items()},
|
||||
docs={k: _deepcopy(v) for k, v in self.docs.items()},
|
||||
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
|
||||
selectors=self.root_project.manifest_selectors,
|
||||
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
|
||||
metadata=self.metadata,
|
||||
disabled=[_deepcopy(n) for n in self.disabled],
|
||||
files={k: _deepcopy(v) for k, v in self.files.items()},
|
||||
state_check=_deepcopy(self.state_check),
|
||||
)
|
||||
|
||||
def writable_manifest(self):
|
||||
@@ -762,10 +808,10 @@ class Manifest:
|
||||
parent_map=backward_edges,
|
||||
)
|
||||
|
||||
def to_dict(self, omit_none=True, validate=False):
|
||||
return self.writable_manifest().to_dict(
|
||||
omit_none=omit_none, validate=validate
|
||||
)
|
||||
# When 'to_dict' is called on the Manifest, it substitues a
|
||||
# WritableManifest
|
||||
def __pre_serialize__(self):
|
||||
return self.writable_manifest()
|
||||
|
||||
def write(self, path):
|
||||
self.writable_manifest().write(path)
|
||||
@@ -887,6 +933,7 @@ class Manifest:
|
||||
|
||||
def merge_from_artifact(
|
||||
self,
|
||||
adapter,
|
||||
other: 'WritableManifest',
|
||||
selected: AbstractSet[UniqueID],
|
||||
) -> None:
|
||||
@@ -898,10 +945,14 @@ class Manifest:
|
||||
refables = set(NodeType.refable())
|
||||
merged = set()
|
||||
for unique_id, node in other.nodes.items():
|
||||
if (
|
||||
current = self.nodes.get(unique_id)
|
||||
if current and (
|
||||
node.resource_type in refables and
|
||||
not node.is_ephemeral and
|
||||
unique_id not in selected
|
||||
unique_id not in selected and
|
||||
not adapter.get_relation(
|
||||
current.database, current.schema, current.identifier
|
||||
)
|
||||
):
|
||||
merged.add(unique_id)
|
||||
self.nodes[unique_id] = node.replace(deferred=True)
|
||||
@@ -912,6 +963,212 @@ class Manifest:
|
||||
f'Merged {len(merged)} items from state (sample: {sample})'
|
||||
)
|
||||
|
||||
# Methods that were formerly in ParseResult
|
||||
def get_file(self, source_file: SourceFile) -> SourceFile:
|
||||
key = source_file.search_key
|
||||
if key is None:
|
||||
return source_file
|
||||
if key not in self.files:
|
||||
self.files[key] = source_file
|
||||
return self.files[key]
|
||||
|
||||
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
|
||||
if macro.unique_id in self.macros:
|
||||
# detect that the macro exists and emit an error
|
||||
other_path = self.macros[macro.unique_id].original_file_path
|
||||
# subtract 2 for the "Compilation Error" indent
|
||||
# note that the line wrap eats newlines, so if you want newlines,
|
||||
# this is the result :(
|
||||
msg = line_wrap_message(
|
||||
f'''\
|
||||
dbt found two macros named "{macro.name}" in the project
|
||||
"{macro.package_name}".
|
||||
|
||||
|
||||
To fix this error, rename or remove one of the following
|
||||
macros:
|
||||
|
||||
- {macro.original_file_path}
|
||||
|
||||
- {other_path}
|
||||
''',
|
||||
subtract=2
|
||||
)
|
||||
raise_compiler_error(msg)
|
||||
|
||||
self.macros[macro.unique_id] = macro
|
||||
self.get_file(source_file).macros.append(macro.unique_id)
|
||||
|
||||
def has_file(self, source_file: SourceFile) -> bool:
|
||||
key = source_file.search_key
|
||||
if key is None:
|
||||
return False
|
||||
if key not in self.files:
|
||||
return False
|
||||
my_checksum = self.files[key].checksum
|
||||
return my_checksum == source_file.checksum
|
||||
|
||||
def add_source(
|
||||
self, source_file: SourceFile, source: UnpatchedSourceDefinition
|
||||
):
|
||||
# sources can't be overwritten!
|
||||
_check_duplicates(source, self.sources)
|
||||
self.sources[source.unique_id] = source # type: ignore
|
||||
self.get_file(source_file).sources.append(source.unique_id)
|
||||
|
||||
def add_node_nofile(self, node: ManifestNodes):
|
||||
# nodes can't be overwritten!
|
||||
_check_duplicates(node, self.nodes)
|
||||
self.nodes[node.unique_id] = node
|
||||
|
||||
def add_node(self, source_file: SourceFile, node: ManifestNodes):
|
||||
self.add_node_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
|
||||
def add_exposure(self, source_file: SourceFile, exposure: ParsedExposure):
|
||||
_check_duplicates(exposure, self.exposures)
|
||||
self.exposures[exposure.unique_id] = exposure
|
||||
self.get_file(source_file).exposures.append(exposure.unique_id)
|
||||
|
||||
def add_disabled_nofile(self, node: CompileResultNode):
|
||||
if node.unique_id in self._disabled:
|
||||
self._disabled[node.unique_id].append(node)
|
||||
else:
|
||||
self._disabled[node.unique_id] = [node]
|
||||
|
||||
def add_disabled(self, source_file: SourceFile, node: CompileResultNode):
|
||||
self.add_disabled_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
|
||||
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
|
||||
_check_duplicates(doc, self.docs)
|
||||
self.docs[doc.unique_id] = doc
|
||||
self.get_file(source_file).docs.append(doc.unique_id)
|
||||
|
||||
def _get_disabled(
|
||||
self,
|
||||
unique_id: str,
|
||||
match_file: SourceFile,
|
||||
) -> List[CompileResultNode]:
|
||||
if unique_id not in self._disabled:
|
||||
raise InternalException(
|
||||
'called _get_disabled with id={}, but it does not exist'
|
||||
.format(unique_id)
|
||||
)
|
||||
return [
|
||||
n for n in self._disabled[unique_id]
|
||||
if n.original_file_path == match_file.path.original_file_path
|
||||
]
|
||||
|
||||
# This is only used by 'sanitized_update' which processes "old_manifest"
|
||||
def _process_node(
|
||||
self,
|
||||
node_id: str,
|
||||
source_file: SourceFile,
|
||||
old_file: SourceFile,
|
||||
old_manifest: Any,
|
||||
) -> None:
|
||||
"""Nodes are a special kind of complicated - there can be multiple
|
||||
with the same name, as long as all but one are disabled.
|
||||
|
||||
Only handle nodes where the matching node has the same resource type
|
||||
as the current parser.
|
||||
"""
|
||||
source_path = source_file.path.original_file_path
|
||||
found: bool = False
|
||||
if node_id in old_manifest.nodes:
|
||||
old_node = old_manifest.nodes[node_id]
|
||||
if old_node.original_file_path == source_path:
|
||||
self.add_node(source_file, old_node)
|
||||
found = True
|
||||
|
||||
if node_id in old_manifest._disabled:
|
||||
matches = old_manifest._get_disabled(node_id, source_file)
|
||||
for match in matches:
|
||||
self.add_disabled(source_file, match)
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
raise CompilationException(
|
||||
'Expected to find "{}" in cached "manifest.nodes" or '
|
||||
'"manifest.disabled" based on cached file information: {}!'
|
||||
.format(node_id, old_file)
|
||||
)
|
||||
|
||||
# This is called by ManifestLoader._get_cached/parse_with_cache,
|
||||
# which handles updating the ManifestLoader results with information
|
||||
# from the "old_manifest", i.e. the pickle file if the checksums are
|
||||
# the same.
|
||||
def sanitized_update(
|
||||
self,
|
||||
source_file: SourceFile,
|
||||
old_manifest: Any,
|
||||
resource_type: NodeType,
|
||||
) -> bool:
|
||||
|
||||
if isinstance(source_file.path, RemoteFile):
|
||||
return False
|
||||
|
||||
old_file = old_manifest.get_file(source_file)
|
||||
for doc_id in old_file.docs:
|
||||
doc = _expect_value(doc_id, old_manifest.docs, old_file, "docs")
|
||||
self.add_doc(source_file, doc)
|
||||
|
||||
for macro_id in old_file.macros:
|
||||
macro = _expect_value(
|
||||
macro_id, old_manifest.macros, old_file, "macros"
|
||||
)
|
||||
self.add_macro(source_file, macro)
|
||||
|
||||
for source_id in old_file.sources:
|
||||
source = _expect_value(
|
||||
source_id, old_manifest.sources, old_file, "sources"
|
||||
)
|
||||
self.add_source(source_file, source)
|
||||
|
||||
# because we know this is how we _parsed_ the node, we can safely
|
||||
# assume if it's disabled it was done by the project or file, and
|
||||
# we can keep our old data
|
||||
# the node ID could be in old_manifest.disabled AND in old_manifest.nodes.
|
||||
# In that case, we have to make sure the path also matches.
|
||||
for node_id in old_file.nodes:
|
||||
# cheat: look at the first part of the node ID and compare it to
|
||||
# the parser resource type. On a mismatch, bail out.
|
||||
if resource_type != node_id.split('.')[0]:
|
||||
continue
|
||||
self._process_node(node_id, source_file, old_file, old_manifest)
|
||||
|
||||
for exposure_id in old_file.exposures:
|
||||
exposure = _expect_value(
|
||||
exposure_id, old_manifest.exposures, old_file, "exposures"
|
||||
)
|
||||
self.add_exposure(source_file, exposure)
|
||||
|
||||
# Note: There shouldn't be any patches in here after the cleanup.
|
||||
# The pickled Manifest should have had all patches applied.
|
||||
patched = False
|
||||
for name in old_file.patches:
|
||||
patch = _expect_value(
|
||||
name, old_manifest.patches, old_file, "patches"
|
||||
)
|
||||
self.add_patch(source_file, patch)
|
||||
patched = True
|
||||
if patched:
|
||||
self.get_file(source_file).patches.sort()
|
||||
|
||||
macro_patched = False
|
||||
for key in old_file.macro_patches:
|
||||
macro_patch = _expect_value(
|
||||
key, old_manifest.macro_patches, old_file, "macro_patches"
|
||||
)
|
||||
self.add_macro_patch(source_file, macro_patch)
|
||||
macro_patched = True
|
||||
if macro_patched:
|
||||
self.get_file(source_file).macro_patches.sort()
|
||||
|
||||
return True
|
||||
# end of methods formerly in ParseResult
|
||||
|
||||
# Provide support for copy.deepcopy() - we just need to avoid the lock!
|
||||
# pickle and deepcopy use this. It returns a callable object used to
|
||||
# create the initial version of the object and a tuple of arguments
|
||||
@@ -931,6 +1188,11 @@ class Manifest:
|
||||
self.files,
|
||||
self.metadata,
|
||||
self.flat_graph,
|
||||
self.state_check,
|
||||
self.macro_patches,
|
||||
self.patches,
|
||||
self.source_patches,
|
||||
self._disabled,
|
||||
self._docs_cache,
|
||||
self._sources_cache,
|
||||
self._refs_cache,
|
||||
@@ -938,6 +1200,18 @@ class Manifest:
|
||||
return self.__class__, args
|
||||
|
||||
|
||||
class MacroManifest(MacroMethods):
|
||||
def __init__(self, macros):
|
||||
self.macros = macros
|
||||
self.metadata = ManifestMetadata()
|
||||
# This is returned by the 'graph' context property
|
||||
# in the ProviderContext class.
|
||||
self.flat_graph = {}
|
||||
|
||||
|
||||
AnyManifest = Union[Manifest, MacroManifest]
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('manifest', 1)
|
||||
class WritableManifest(ArtifactMixin):
|
||||
@@ -983,3 +1257,22 @@ class WritableManifest(ArtifactMixin):
|
||||
metadata: ManifestMetadata = field(metadata=dict(
|
||||
description='Metadata about the manifest',
|
||||
))
|
||||
|
||||
|
||||
def _check_duplicates(
|
||||
value: HasUniqueID, src: Mapping[str, HasUniqueID]
|
||||
):
|
||||
if value.unique_id in src:
|
||||
raise_duplicate_resource_name(value, src[value.unique_id])
|
||||
|
||||
|
||||
def _expect_value(
|
||||
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
|
||||
) -> V_T:
|
||||
if key not in src:
|
||||
raise CompilationException(
|
||||
'Expected to find "{}" in cached "result.{}" based '
|
||||
'on cached file information: {}!'
|
||||
.format(key, name, old_file)
|
||||
)
|
||||
return src[key]
|
||||
|
||||
@@ -2,19 +2,12 @@ from dataclasses import field, Field, dataclass
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from typing import (
|
||||
Any, List, Optional, Dict, MutableMapping, Union, Type, NewType, Tuple,
|
||||
TypeVar, Callable
|
||||
Any, List, Optional, Dict, MutableMapping, Union, Type,
|
||||
TypeVar, Callable,
|
||||
)
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ValidationError, register_pattern,
|
||||
)
|
||||
|
||||
# TODO: patch+upgrade hologram to avoid this jsonschema import
|
||||
import jsonschema # type: ignore
|
||||
|
||||
# This is protected, but we really do want to reuse this logic, and the cache!
|
||||
# It would be nice to move the custom error picking stuff into hologram!
|
||||
from hologram import _validate_schema
|
||||
from hologram import JsonSchemaMixin, ValidationError
|
||||
from hologram.helpers import StrEnum, register_pattern
|
||||
|
||||
from dbt.contracts.graph.unparsed import AdditionalPropertiesAllowed
|
||||
from dbt.exceptions import CompilationException, InternalException
|
||||
from dbt.contracts.util import Replaceable, list_str
|
||||
@@ -170,22 +163,15 @@ def insensitive_patterns(*patterns: str):
|
||||
return '^({})$'.format('|'.join(lowercased))
|
||||
|
||||
|
||||
Severity = NewType('Severity', str)
|
||||
class Severity(str):
|
||||
pass
|
||||
|
||||
|
||||
register_pattern(Severity, insensitive_patterns('warn', 'error'))
|
||||
|
||||
|
||||
class SnapshotStrategy(StrEnum):
|
||||
Timestamp = 'timestamp'
|
||||
Check = 'check'
|
||||
|
||||
|
||||
class All(StrEnum):
|
||||
All = 'all'
|
||||
|
||||
|
||||
@dataclass
|
||||
class Hook(JsonSchemaMixin, Replaceable):
|
||||
class Hook(dbtClassMixin, Replaceable):
|
||||
sql: str
|
||||
transaction: bool = True
|
||||
index: Optional[int] = None
|
||||
@@ -313,29 +299,6 @@ class BaseConfig(
|
||||
)
|
||||
return result
|
||||
|
||||
def to_dict(
|
||||
self,
|
||||
omit_none: bool = True,
|
||||
validate: bool = False,
|
||||
*,
|
||||
omit_hidden: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
result = super().to_dict(omit_none=omit_none, validate=validate)
|
||||
if omit_hidden and not omit_none:
|
||||
for fld, target_field in self._get_fields():
|
||||
if target_field not in result:
|
||||
continue
|
||||
|
||||
# if the field is not None, preserve it regardless of the
|
||||
# setting. This is in line with existing behavior, but isn't
|
||||
# an endorsement of it!
|
||||
if result[target_field] is not None:
|
||||
continue
|
||||
|
||||
if not ShowBehavior.should_show(fld):
|
||||
del result[target_field]
|
||||
return result
|
||||
|
||||
def update_from(
|
||||
self: T, data: Dict[str, Any], adapter_type: str, validate: bool = True
|
||||
) -> T:
|
||||
@@ -344,7 +307,7 @@ class BaseConfig(
|
||||
"""
|
||||
# sadly, this is a circular import
|
||||
from dbt.adapters.factory import get_config_class_by_name
|
||||
dct = self.to_dict(omit_none=False, validate=False, omit_hidden=False)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
|
||||
adapter_config_cls = get_config_class_by_name(adapter_type)
|
||||
|
||||
@@ -358,21 +321,23 @@ class BaseConfig(
|
||||
dct.update(data)
|
||||
|
||||
# any validation failures must have come from the update
|
||||
return self.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
self.validate(dct)
|
||||
return self.from_dict(dct)
|
||||
|
||||
def finalize_and_validate(self: T) -> T:
|
||||
# from_dict will validate for us
|
||||
dct = self.to_dict(omit_none=False, validate=False)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
self.validate(dct)
|
||||
return self.from_dict(dct)
|
||||
|
||||
def replace(self, **kwargs):
|
||||
dct = self.to_dict(validate=False)
|
||||
dct = self.to_dict(omit_none=True)
|
||||
|
||||
mapping = self.field_mapping()
|
||||
for key, value in kwargs.items():
|
||||
new_key = mapping.get(key, key)
|
||||
dct[new_key] = value
|
||||
return self.from_dict(dct, validate=False)
|
||||
return self.from_dict(dct)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -431,12 +396,33 @@ class NodeConfig(BaseConfig):
|
||||
full_refresh: Optional[bool] = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data, validate=True):
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
field_map = {'post-hook': 'post_hook', 'pre-hook': 'pre_hook'}
|
||||
# create a new dict because otherwise it gets overwritten in
|
||||
# tests
|
||||
new_dict = {}
|
||||
for key in data:
|
||||
new_dict[key] = data[key]
|
||||
data = new_dict
|
||||
for key in hooks.ModelHookType:
|
||||
if key in data:
|
||||
data[key] = [hooks.get_hook_dict(h) for h in data[key]]
|
||||
return super().from_dict(data, validate=validate)
|
||||
for field_name in field_map:
|
||||
if field_name in data:
|
||||
new_name = field_map[field_name]
|
||||
data[new_name] = data.pop(field_name)
|
||||
return data
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
field_map = {'post_hook': 'post-hook', 'pre_hook': 'pre-hook'}
|
||||
for field_name in field_map:
|
||||
if field_name in dct:
|
||||
dct[field_map[field_name]] = dct.pop(field_name)
|
||||
return dct
|
||||
|
||||
# this is still used by jsonschema validation
|
||||
@classmethod
|
||||
def field_mapping(cls):
|
||||
return {'post_hook': 'post-hook', 'pre_hook': 'pre-hook'}
|
||||
@@ -450,184 +436,53 @@ class SeedConfig(NodeConfig):
|
||||
|
||||
@dataclass
|
||||
class TestConfig(NodeConfig):
|
||||
materialized: str = 'test'
|
||||
severity: Severity = Severity('ERROR')
|
||||
|
||||
|
||||
SnapshotVariants = Union[
|
||||
'TimestampSnapshotConfig',
|
||||
'CheckSnapshotConfig',
|
||||
'GenericSnapshotConfig',
|
||||
]
|
||||
|
||||
|
||||
def _relevance_without_strategy(error: jsonschema.ValidationError):
|
||||
# calculate the 'relevance' of an error the normal jsonschema way, except
|
||||
# if the validator is in the 'strategy' field and its conflicting with the
|
||||
# 'enum'. This suppresses `"'timestamp' is not one of ['check']` and such
|
||||
if 'strategy' in error.path and error.validator in {'enum', 'not'}:
|
||||
length = 1
|
||||
else:
|
||||
length = -len(error.path)
|
||||
validator = error.validator
|
||||
return length, validator not in {'anyOf', 'oneOf'}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotWrapper(JsonSchemaMixin):
|
||||
"""This is a little wrapper to let us serialize/deserialize the
|
||||
SnapshotVariants union.
|
||||
"""
|
||||
config: SnapshotVariants # mypy: ignore
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data: Any):
|
||||
config = data.get('config', {})
|
||||
|
||||
if config.get('strategy') == 'check':
|
||||
schema = _validate_schema(CheckSnapshotConfig)
|
||||
to_validate = config
|
||||
|
||||
elif config.get('strategy') == 'timestamp':
|
||||
schema = _validate_schema(TimestampSnapshotConfig)
|
||||
to_validate = config
|
||||
|
||||
else:
|
||||
schema = _validate_schema(cls)
|
||||
to_validate = data
|
||||
|
||||
validator = jsonschema.Draft7Validator(schema)
|
||||
|
||||
error = jsonschema.exceptions.best_match(
|
||||
validator.iter_errors(to_validate),
|
||||
key=_relevance_without_strategy,
|
||||
)
|
||||
|
||||
if error is not None:
|
||||
raise ValidationError.create_from(error) from error
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmptySnapshotConfig(NodeConfig):
|
||||
materialized: str = 'snapshot'
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
@dataclass
|
||||
class SnapshotConfig(EmptySnapshotConfig):
|
||||
unique_key: str = field(init=False, metadata=dict(init_required=True))
|
||||
target_schema: str = field(init=False, metadata=dict(init_required=True))
|
||||
strategy: Optional[str] = None
|
||||
unique_key: Optional[str] = None
|
||||
target_schema: Optional[str] = None
|
||||
target_database: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
unique_key: str,
|
||||
target_schema: str,
|
||||
target_database: Optional[str] = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
self.unique_key = unique_key
|
||||
self.target_schema = target_schema
|
||||
self.target_database = target_database
|
||||
# kwargs['materialized'] = materialized
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# type hacks...
|
||||
@classmethod
|
||||
def _get_fields(cls) -> List[Tuple[Field, str]]: # type: ignore
|
||||
fields: List[Tuple[Field, str]] = []
|
||||
for old_field, name in super()._get_fields():
|
||||
new_field = old_field
|
||||
# tell hologram we're really an initvar
|
||||
if old_field.metadata and old_field.metadata.get('init_required'):
|
||||
new_field = field(init=True, metadata=old_field.metadata)
|
||||
new_field.name = old_field.name
|
||||
new_field.type = old_field.type
|
||||
new_field._field_type = old_field._field_type # type: ignore
|
||||
fields.append((new_field, name))
|
||||
return fields
|
||||
|
||||
def finalize_and_validate(self: 'SnapshotConfig') -> SnapshotVariants:
|
||||
data = self.to_dict()
|
||||
return SnapshotWrapper.from_dict({'config': data}).config
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class GenericSnapshotConfig(SnapshotConfig):
|
||||
strategy: str = field(init=False, metadata=dict(init_required=True))
|
||||
|
||||
def __init__(self, strategy: str, **kwargs) -> None:
|
||||
self.strategy = strategy
|
||||
super().__init__(**kwargs)
|
||||
updated_at: Optional[str] = None
|
||||
check_cols: Optional[Union[str, List[str]]] = None
|
||||
|
||||
@classmethod
|
||||
def _collect_json_schema(
|
||||
cls, definitions: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
# this is the method you want to override in hologram if you want
|
||||
# to do clever things about the json schema and have classes that
|
||||
# contain instances of your JsonSchemaMixin respect the change.
|
||||
schema = super()._collect_json_schema(definitions)
|
||||
def validate(cls, data):
|
||||
super().validate(data)
|
||||
if data.get('strategy') == 'check':
|
||||
if not data.get('check_cols'):
|
||||
raise ValidationError(
|
||||
"A snapshot configured with the check strategy must "
|
||||
"specify a check_cols configuration.")
|
||||
if (isinstance(data['check_cols'], str) and
|
||||
data['check_cols'] != 'all'):
|
||||
raise ValidationError(
|
||||
f"Invalid value for 'check_cols': {data['check_cols']}. "
|
||||
"Expected 'all' or a list of strings.")
|
||||
|
||||
# Instead of just the strategy we'd calculate normally, say
|
||||
# "this strategy except none of our specialization strategies".
|
||||
strategies = [schema['properties']['strategy']]
|
||||
for specialization in (TimestampSnapshotConfig, CheckSnapshotConfig):
|
||||
strategies.append(
|
||||
{'not': specialization.json_schema()['properties']['strategy']}
|
||||
)
|
||||
elif data.get('strategy') == 'timestamp':
|
||||
if not data.get('updated_at'):
|
||||
raise ValidationError(
|
||||
"A snapshot configured with the timestamp strategy "
|
||||
"must specify an updated_at configuration.")
|
||||
if data.get('check_cols'):
|
||||
raise ValidationError(
|
||||
"A 'timestamp' snapshot should not have 'check_cols'")
|
||||
# If the strategy is not 'check' or 'timestamp' it's a custom strategy,
|
||||
# formerly supported with GenericSnapshotConfig
|
||||
|
||||
schema['properties']['strategy'] = {
|
||||
'allOf': strategies
|
||||
}
|
||||
return schema
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class TimestampSnapshotConfig(SnapshotConfig):
|
||||
strategy: str = field(
|
||||
init=False,
|
||||
metadata=dict(
|
||||
restrict=[str(SnapshotStrategy.Timestamp)],
|
||||
init_required=True,
|
||||
),
|
||||
)
|
||||
updated_at: str = field(init=False, metadata=dict(init_required=True))
|
||||
|
||||
def __init__(
|
||||
self, strategy: str, updated_at: str, **kwargs
|
||||
) -> None:
|
||||
self.strategy = strategy
|
||||
self.updated_at = updated_at
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class CheckSnapshotConfig(SnapshotConfig):
|
||||
strategy: str = field(
|
||||
init=False,
|
||||
metadata=dict(
|
||||
restrict=[str(SnapshotStrategy.Check)],
|
||||
init_required=True,
|
||||
),
|
||||
)
|
||||
# TODO: is there a way to get this to accept tuples of strings? Adding
|
||||
# `Tuple[str, ...]` to the list of types results in this:
|
||||
# ['email'] is valid under each of {'type': 'array', 'items':
|
||||
# {'type': 'string'}}, {'type': 'array', 'items': {'type': 'string'}}
|
||||
# but without it, parsing gets upset about values like `('email',)`
|
||||
# maybe hologram itself should support this behavior? It's not like tuples
|
||||
# are meaningful in json
|
||||
check_cols: Union[All, List[str]] = field(
|
||||
init=False,
|
||||
metadata=dict(init_required=True),
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self, strategy: str, check_cols: Union[All, List[str]],
|
||||
**kwargs
|
||||
) -> None:
|
||||
self.strategy = strategy
|
||||
self.check_cols = check_cols
|
||||
super().__init__(**kwargs)
|
||||
def finalize_and_validate(self):
|
||||
data = self.to_dict(omit_none=True)
|
||||
self.validate(data)
|
||||
return self.from_dict(data)
|
||||
|
||||
|
||||
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
|
||||
|
||||
@@ -13,8 +13,9 @@ from typing import (
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import ExtensibleJsonSchemaMixin
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ExtensibleDbtClassMixin
|
||||
)
|
||||
|
||||
from dbt.clients.system import write_file
|
||||
from dbt.contracts.files import FileHash, MAXIMUM_SEED_SIZE_NAME
|
||||
@@ -38,20 +39,14 @@ from .model_config import (
|
||||
TestConfig,
|
||||
SourceConfig,
|
||||
EmptySnapshotConfig,
|
||||
SnapshotVariants,
|
||||
)
|
||||
# import these 3 so the SnapshotVariants forward ref works.
|
||||
from .model_config import ( # noqa
|
||||
TimestampSnapshotConfig,
|
||||
CheckSnapshotConfig,
|
||||
GenericSnapshotConfig,
|
||||
SnapshotConfig,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ColumnInfo(
|
||||
AdditionalPropertiesMixin,
|
||||
ExtensibleJsonSchemaMixin,
|
||||
ExtensibleDbtClassMixin,
|
||||
Replaceable
|
||||
):
|
||||
name: str
|
||||
@@ -64,7 +59,7 @@ class ColumnInfo(
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasFqn(JsonSchemaMixin, Replaceable):
|
||||
class HasFqn(dbtClassMixin, Replaceable):
|
||||
fqn: List[str]
|
||||
|
||||
def same_fqn(self, other: 'HasFqn') -> bool:
|
||||
@@ -72,12 +67,12 @@ class HasFqn(JsonSchemaMixin, Replaceable):
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasUniqueID(JsonSchemaMixin, Replaceable):
|
||||
class HasUniqueID(dbtClassMixin, Replaceable):
|
||||
unique_id: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class MacroDependsOn(JsonSchemaMixin, Replaceable):
|
||||
class MacroDependsOn(dbtClassMixin, Replaceable):
|
||||
macros: List[str] = field(default_factory=list)
|
||||
|
||||
# 'in' on lists is O(n) so this is O(n^2) for # of macros
|
||||
@@ -96,12 +91,22 @@ class DependsOn(MacroDependsOn):
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasRelationMetadata(JsonSchemaMixin, Replaceable):
|
||||
class HasRelationMetadata(dbtClassMixin, Replaceable):
|
||||
database: Optional[str]
|
||||
schema: str
|
||||
|
||||
# Can't set database to None like it ought to be
|
||||
# because it messes up the subclasses and default parameters
|
||||
# so hack it here
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
if 'database' not in data:
|
||||
data['database'] = None
|
||||
return data
|
||||
|
||||
class ParsedNodeMixins(JsonSchemaMixin):
|
||||
|
||||
class ParsedNodeMixins(dbtClassMixin):
|
||||
resource_type: NodeType
|
||||
depends_on: DependsOn
|
||||
config: NodeConfig
|
||||
@@ -132,8 +137,12 @@ class ParsedNodeMixins(JsonSchemaMixin):
|
||||
self.meta = patch.meta
|
||||
self.docs = patch.docs
|
||||
if flags.STRICT_MODE:
|
||||
assert isinstance(self, JsonSchemaMixin)
|
||||
self.to_dict(validate=True, omit_none=False)
|
||||
# It seems odd that an instance can be invalid
|
||||
# Maybe there should be validation or restrictions
|
||||
# elsewhere?
|
||||
assert isinstance(self, dbtClassMixin)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
self.validate(dct)
|
||||
|
||||
def get_materialization(self):
|
||||
return self.config.materialized
|
||||
@@ -335,14 +344,14 @@ class ParsedSeedNode(ParsedNode):
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestMetadata(JsonSchemaMixin, Replaceable):
|
||||
namespace: Optional[str]
|
||||
class TestMetadata(dbtClassMixin, Replaceable):
|
||||
name: str
|
||||
kwargs: Dict[str, Any]
|
||||
kwargs: Dict[str, Any] = field(default_factory=dict)
|
||||
namespace: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasTestMetadata(JsonSchemaMixin):
|
||||
class HasTestMetadata(dbtClassMixin):
|
||||
test_metadata: TestMetadata
|
||||
|
||||
|
||||
@@ -394,7 +403,7 @@ class IntermediateSnapshotNode(ParsedNode):
|
||||
@dataclass
|
||||
class ParsedSnapshotNode(ParsedNode):
|
||||
resource_type: NodeType = field(metadata={'restrict': [NodeType.Snapshot]})
|
||||
config: SnapshotVariants
|
||||
config: SnapshotConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -443,8 +452,10 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
|
||||
self.docs = patch.docs
|
||||
self.arguments = patch.arguments
|
||||
if flags.STRICT_MODE:
|
||||
assert isinstance(self, JsonSchemaMixin)
|
||||
self.to_dict(validate=True, omit_none=False)
|
||||
# What does this actually validate?
|
||||
assert isinstance(self, dbtClassMixin)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
self.validate(dct)
|
||||
|
||||
def same_contents(self, other: Optional['ParsedMacro']) -> bool:
|
||||
if other is None:
|
||||
@@ -654,9 +665,9 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
|
||||
type: ExposureType
|
||||
owner: ExposureOwner
|
||||
resource_type: NodeType = NodeType.Exposure
|
||||
description: str = ''
|
||||
maturity: Optional[MaturityType] = None
|
||||
url: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
depends_on: DependsOn = field(default_factory=DependsOn)
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
sources: List[List[str]] = field(default_factory=list)
|
||||
@@ -709,6 +720,18 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
|
||||
)
|
||||
|
||||
|
||||
ManifestNodes = Union[
|
||||
ParsedAnalysisNode,
|
||||
ParsedDataTestNode,
|
||||
ParsedHookNode,
|
||||
ParsedModelNode,
|
||||
ParsedRPCNode,
|
||||
ParsedSchemaTestNode,
|
||||
ParsedSeedNode,
|
||||
ParsedSnapshotNode,
|
||||
]
|
||||
|
||||
|
||||
ParsedResource = Union[
|
||||
ParsedDocumentation,
|
||||
ParsedMacro,
|
||||
|
||||
@@ -8,8 +8,9 @@ from dbt.contracts.util import (
|
||||
import dbt.helper_types # noqa:F401
|
||||
from dbt.exceptions import CompilationException
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import StrEnum, ExtensibleJsonSchemaMixin
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin
|
||||
)
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import timedelta
|
||||
@@ -18,7 +19,7 @@ from typing import Optional, List, Union, Dict, Any, Sequence
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedBaseNode(JsonSchemaMixin, Replaceable):
|
||||
class UnparsedBaseNode(dbtClassMixin, Replaceable):
|
||||
package_name: str
|
||||
root_path: str
|
||||
path: str
|
||||
@@ -66,12 +67,12 @@ class UnparsedRunHook(UnparsedNode):
|
||||
|
||||
|
||||
@dataclass
|
||||
class Docs(JsonSchemaMixin, Replaceable):
|
||||
class Docs(dbtClassMixin, Replaceable):
|
||||
show: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasDocs(AdditionalPropertiesMixin, ExtensibleJsonSchemaMixin,
|
||||
class HasDocs(AdditionalPropertiesMixin, ExtensibleDbtClassMixin,
|
||||
Replaceable):
|
||||
name: str
|
||||
description: str = ''
|
||||
@@ -100,7 +101,7 @@ class UnparsedColumn(HasTests):
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasColumnDocs(JsonSchemaMixin, Replaceable):
|
||||
class HasColumnDocs(dbtClassMixin, Replaceable):
|
||||
columns: Sequence[HasDocs] = field(default_factory=list)
|
||||
|
||||
|
||||
@@ -110,7 +111,7 @@ class HasColumnTests(HasColumnDocs):
|
||||
|
||||
|
||||
@dataclass
|
||||
class HasYamlMetadata(JsonSchemaMixin):
|
||||
class HasYamlMetadata(dbtClassMixin):
|
||||
original_file_path: str
|
||||
yaml_key: str
|
||||
package_name: str
|
||||
@@ -127,7 +128,7 @@ class UnparsedNodeUpdate(HasColumnTests, HasTests, HasYamlMetadata):
|
||||
|
||||
|
||||
@dataclass
|
||||
class MacroArgument(JsonSchemaMixin):
|
||||
class MacroArgument(dbtClassMixin):
|
||||
name: str
|
||||
type: Optional[str] = None
|
||||
description: str = ''
|
||||
@@ -148,7 +149,7 @@ class TimePeriod(StrEnum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class Time(JsonSchemaMixin, Replaceable):
|
||||
class Time(dbtClassMixin, Replaceable):
|
||||
count: int
|
||||
period: TimePeriod
|
||||
|
||||
@@ -158,19 +159,14 @@ class Time(JsonSchemaMixin, Replaceable):
|
||||
return actual_age > difference
|
||||
|
||||
|
||||
class FreshnessStatus(StrEnum):
|
||||
Pass = 'pass'
|
||||
Warn = 'warn'
|
||||
Error = 'error'
|
||||
|
||||
|
||||
@dataclass
|
||||
class FreshnessThreshold(JsonSchemaMixin, Mergeable):
|
||||
class FreshnessThreshold(dbtClassMixin, Mergeable):
|
||||
warn_after: Optional[Time] = None
|
||||
error_after: Optional[Time] = None
|
||||
filter: Optional[str] = None
|
||||
|
||||
def status(self, age: float) -> FreshnessStatus:
|
||||
def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus":
|
||||
from dbt.contracts.results import FreshnessStatus
|
||||
if self.error_after and self.error_after.exceeded(age):
|
||||
return FreshnessStatus.Error
|
||||
elif self.warn_after and self.warn_after.exceeded(age):
|
||||
@@ -185,7 +181,7 @@ class FreshnessThreshold(JsonSchemaMixin, Mergeable):
|
||||
@dataclass
|
||||
class AdditionalPropertiesAllowed(
|
||||
AdditionalPropertiesMixin,
|
||||
ExtensibleJsonSchemaMixin
|
||||
ExtensibleDbtClassMixin
|
||||
):
|
||||
_extra: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@@ -217,7 +213,7 @@ class ExternalTable(AdditionalPropertiesAllowed, Mergeable):
|
||||
|
||||
|
||||
@dataclass
|
||||
class Quoting(JsonSchemaMixin, Mergeable):
|
||||
class Quoting(dbtClassMixin, Mergeable):
|
||||
database: Optional[bool] = None
|
||||
schema: Optional[bool] = None
|
||||
identifier: Optional[bool] = None
|
||||
@@ -235,15 +231,15 @@ class UnparsedSourceTableDefinition(HasColumnTests, HasTests):
|
||||
external: Optional[ExternalTable] = None
|
||||
tags: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self, omit_none=True, validate=False):
|
||||
result = super().to_dict(omit_none=omit_none, validate=validate)
|
||||
if omit_none and self.freshness is None:
|
||||
result['freshness'] = None
|
||||
return result
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
if 'freshness' not in dct and self.freshness is None:
|
||||
dct['freshness'] = None
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable):
|
||||
class UnparsedSourceDefinition(dbtClassMixin, Replaceable):
|
||||
name: str
|
||||
description: str = ''
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
@@ -262,15 +258,15 @@ class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable):
|
||||
def yaml_key(self) -> 'str':
|
||||
return 'sources'
|
||||
|
||||
def to_dict(self, omit_none=True, validate=False):
|
||||
result = super().to_dict(omit_none=omit_none, validate=validate)
|
||||
if omit_none and self.freshness is None:
|
||||
result['freshness'] = None
|
||||
return result
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
if 'freshnewss' not in dct and self.freshness is None:
|
||||
dct['freshness'] = None
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceTablePatch(JsonSchemaMixin):
|
||||
class SourceTablePatch(dbtClassMixin):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
meta: Optional[Dict[str, Any]] = None
|
||||
@@ -301,7 +297,7 @@ class SourceTablePatch(JsonSchemaMixin):
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourcePatch(JsonSchemaMixin, Replaceable):
|
||||
class SourcePatch(dbtClassMixin, Replaceable):
|
||||
name: str = field(
|
||||
metadata=dict(description='The name of the source to override'),
|
||||
)
|
||||
@@ -345,7 +341,7 @@ class SourcePatch(JsonSchemaMixin, Replaceable):
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedDocumentation(JsonSchemaMixin, Replaceable):
|
||||
class UnparsedDocumentation(dbtClassMixin, Replaceable):
|
||||
package_name: str
|
||||
root_path: str
|
||||
path: str
|
||||
@@ -405,17 +401,17 @@ class MaturityType(StrEnum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExposureOwner(JsonSchemaMixin, Replaceable):
|
||||
class ExposureOwner(dbtClassMixin, Replaceable):
|
||||
email: str
|
||||
name: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedExposure(JsonSchemaMixin, Replaceable):
|
||||
class UnparsedExposure(dbtClassMixin, Replaceable):
|
||||
name: str
|
||||
type: ExposureType
|
||||
owner: ExposureOwner
|
||||
description: str = ''
|
||||
maturity: Optional[MaturityType] = None
|
||||
url: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
depends_on: List[str] = field(default_factory=list)
|
||||
|
||||
@@ -4,25 +4,39 @@ from dbt.helper_types import NoValue
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
from dbt import tracking
|
||||
from dbt import ui
|
||||
|
||||
from hologram import JsonSchemaMixin, ValidationError
|
||||
from hologram.helpers import HyphenatedJsonSchemaMixin, register_pattern, \
|
||||
ExtensibleJsonSchemaMixin
|
||||
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ValidationError,
|
||||
HyphenatedDbtClassMixin,
|
||||
ExtensibleDbtClassMixin,
|
||||
register_pattern, ValidatedStringMixin
|
||||
)
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Dict, Union, Any, NewType
|
||||
from typing import Optional, List, Dict, Union, Any
|
||||
from mashumaro.types import SerializableType
|
||||
|
||||
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
|
||||
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
|
||||
DEFAULT_SEND_ANONYMOUS_USAGE_STATS = True
|
||||
|
||||
|
||||
Name = NewType('Name', str)
|
||||
class Name(ValidatedStringMixin):
|
||||
ValidationRegex = r'^[^\d\W]\w*$'
|
||||
|
||||
|
||||
register_pattern(Name, r'^[^\d\W]\w*$')
|
||||
|
||||
|
||||
class SemverString(str, SerializableType):
|
||||
def _serialize(self) -> str:
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def _deserialize(cls, value: str) -> 'SemverString':
|
||||
return SemverString(value)
|
||||
|
||||
|
||||
# this does not support the full semver (does not allow a trailing -fooXYZ) and
|
||||
# is not restrictive enough for full semver, (allows '1.0'). But it's like
|
||||
# 'semver lite'.
|
||||
SemverString = NewType('SemverString', str)
|
||||
register_pattern(
|
||||
SemverString,
|
||||
r'^(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)(\.(?:0|[1-9]\d*))?$',
|
||||
@@ -30,15 +44,15 @@ register_pattern(
|
||||
|
||||
|
||||
@dataclass
|
||||
class Quoting(JsonSchemaMixin, Mergeable):
|
||||
identifier: Optional[bool]
|
||||
schema: Optional[bool]
|
||||
database: Optional[bool]
|
||||
project: Optional[bool]
|
||||
class Quoting(dbtClassMixin, Mergeable):
|
||||
schema: Optional[bool] = None
|
||||
database: Optional[bool] = None
|
||||
project: Optional[bool] = None
|
||||
identifier: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Package(Replaceable, HyphenatedJsonSchemaMixin):
|
||||
class Package(Replaceable, HyphenatedDbtClassMixin):
|
||||
pass
|
||||
|
||||
|
||||
@@ -54,7 +68,7 @@ RawVersion = Union[str, float]
|
||||
@dataclass
|
||||
class GitPackage(Package):
|
||||
git: str
|
||||
revision: Optional[RawVersion]
|
||||
revision: Optional[RawVersion] = None
|
||||
warn_unpinned: Optional[bool] = None
|
||||
|
||||
def get_revisions(self) -> List[str]:
|
||||
@@ -80,7 +94,7 @@ PackageSpec = Union[LocalPackage, GitPackage, RegistryPackage]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PackageConfig(JsonSchemaMixin, Replaceable):
|
||||
class PackageConfig(dbtClassMixin, Replaceable):
|
||||
packages: List[PackageSpec]
|
||||
|
||||
|
||||
@@ -96,13 +110,13 @@ class ProjectPackageMetadata:
|
||||
|
||||
|
||||
@dataclass
|
||||
class Downloads(ExtensibleJsonSchemaMixin, Replaceable):
|
||||
class Downloads(ExtensibleDbtClassMixin, Replaceable):
|
||||
tarball: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class RegistryPackageMetadata(
|
||||
ExtensibleJsonSchemaMixin,
|
||||
ExtensibleDbtClassMixin,
|
||||
ProjectPackageMetadata,
|
||||
):
|
||||
downloads: Downloads
|
||||
@@ -142,6 +156,7 @@ BANNED_PROJECT_NAMES = {
|
||||
'sql',
|
||||
'sql_now',
|
||||
'store_result',
|
||||
'store_raw_result',
|
||||
'target',
|
||||
'this',
|
||||
'tojson',
|
||||
@@ -153,7 +168,7 @@ BANNED_PROJECT_NAMES = {
|
||||
|
||||
|
||||
@dataclass
|
||||
class Project(HyphenatedJsonSchemaMixin, Replaceable):
|
||||
class Project(HyphenatedDbtClassMixin, Replaceable):
|
||||
name: Name
|
||||
version: Union[SemverString, float]
|
||||
config_version: int
|
||||
@@ -180,6 +195,7 @@ class Project(HyphenatedJsonSchemaMixin, Replaceable):
|
||||
snapshots: Dict[str, Any] = field(default_factory=dict)
|
||||
analyses: Dict[str, Any] = field(default_factory=dict)
|
||||
sources: Dict[str, Any] = field(default_factory=dict)
|
||||
tests: Dict[str, Any] = field(default_factory=dict)
|
||||
vars: Optional[Dict[str, Any]] = field(
|
||||
default=None,
|
||||
metadata=dict(
|
||||
@@ -190,18 +206,16 @@ class Project(HyphenatedJsonSchemaMixin, Replaceable):
|
||||
query_comment: Optional[Union[QueryComment, NoValue, str]] = NoValue()
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data, validate=True) -> 'Project':
|
||||
result = super().from_dict(data, validate=validate)
|
||||
if result.name in BANNED_PROJECT_NAMES:
|
||||
def validate(cls, data):
|
||||
super().validate(data)
|
||||
if data['name'] in BANNED_PROJECT_NAMES:
|
||||
raise ValidationError(
|
||||
f'Invalid project name: {result.name} is a reserved word'
|
||||
f"Invalid project name: {data['name']} is a reserved word"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserConfig(ExtensibleJsonSchemaMixin, Replaceable, UserConfigContract):
|
||||
class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
|
||||
send_anonymous_usage_stats: bool = DEFAULT_SEND_ANONYMOUS_USAGE_STATS
|
||||
use_colors: Optional[bool] = None
|
||||
partial_parse: Optional[bool] = None
|
||||
@@ -221,7 +235,7 @@ class UserConfig(ExtensibleJsonSchemaMixin, Replaceable, UserConfigContract):
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProfileConfig(HyphenatedJsonSchemaMixin, Replaceable):
|
||||
class ProfileConfig(HyphenatedDbtClassMixin, Replaceable):
|
||||
profile_name: str = field(metadata={'preserve_underscore': True})
|
||||
target_name: str = field(metadata={'preserve_underscore': True})
|
||||
config: UserConfig
|
||||
@@ -232,10 +246,10 @@ class ProfileConfig(HyphenatedJsonSchemaMixin, Replaceable):
|
||||
|
||||
@dataclass
|
||||
class ConfiguredQuoting(Quoting, Replaceable):
|
||||
identifier: bool
|
||||
schema: bool
|
||||
database: Optional[bool]
|
||||
project: Optional[bool]
|
||||
identifier: bool = True
|
||||
schema: bool = True
|
||||
database: Optional[bool] = None
|
||||
project: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -248,5 +262,5 @@ class Configuration(Project, ProfileConfig):
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectList(JsonSchemaMixin):
|
||||
class ProjectList(dbtClassMixin):
|
||||
projects: Dict[str, Project]
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, fields
|
||||
from typing import (
|
||||
Optional, TypeVar, Generic, Dict,
|
||||
Optional, Dict,
|
||||
)
|
||||
from typing_extensions import Protocol
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.contracts.util import Replaceable
|
||||
@@ -32,7 +31,7 @@ class HasQuoting(Protocol):
|
||||
quoting: Dict[str, bool]
|
||||
|
||||
|
||||
class FakeAPIObject(JsonSchemaMixin, Replaceable, Mapping):
|
||||
class FakeAPIObject(dbtClassMixin, Replaceable, Mapping):
|
||||
# override the mapping truthiness, len is always >1
|
||||
def __bool__(self):
|
||||
return True
|
||||
@@ -53,21 +52,18 @@ class FakeAPIObject(JsonSchemaMixin, Replaceable, Mapping):
|
||||
return len(fields(self.__class__))
|
||||
|
||||
def incorporate(self, **kwargs):
|
||||
value = self.to_dict()
|
||||
value = self.to_dict(omit_none=True)
|
||||
value = deep_merge(value, kwargs)
|
||||
return self.from_dict(value)
|
||||
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ComponentObject(FakeAPIObject, Generic[T]):
|
||||
database: T
|
||||
schema: T
|
||||
identifier: T
|
||||
class Policy(FakeAPIObject):
|
||||
database: bool = True
|
||||
schema: bool = True
|
||||
identifier: bool = True
|
||||
|
||||
def get_part(self, key: ComponentName) -> T:
|
||||
def get_part(self, key: ComponentName) -> bool:
|
||||
if key == ComponentName.Database:
|
||||
return self.database
|
||||
elif key == ComponentName.Schema:
|
||||
@@ -80,25 +76,18 @@ class _ComponentObject(FakeAPIObject, Generic[T]):
|
||||
.format(key, list(ComponentName))
|
||||
)
|
||||
|
||||
def replace_dict(self, dct: Dict[ComponentName, T]):
|
||||
kwargs: Dict[str, T] = {}
|
||||
def replace_dict(self, dct: Dict[ComponentName, bool]):
|
||||
kwargs: Dict[str, bool] = {}
|
||||
for k, v in dct.items():
|
||||
kwargs[str(k)] = v
|
||||
return self.replace(**kwargs)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Policy(_ComponentObject[bool]):
|
||||
database: bool = True
|
||||
schema: bool = True
|
||||
identifier: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class Path(_ComponentObject[Optional[str]]):
|
||||
database: Optional[str]
|
||||
schema: Optional[str]
|
||||
identifier: Optional[str]
|
||||
class Path(FakeAPIObject):
|
||||
database: Optional[str] = None
|
||||
schema: Optional[str] = None
|
||||
identifier: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
# handle pesky jinja2.Undefined sneaking in here and messing up rende
|
||||
@@ -120,3 +109,22 @@ class Path(_ComponentObject[Optional[str]]):
|
||||
if part is not None:
|
||||
part = part.lower()
|
||||
return part
|
||||
|
||||
def get_part(self, key: ComponentName) -> Optional[str]:
|
||||
if key == ComponentName.Database:
|
||||
return self.database
|
||||
elif key == ComponentName.Schema:
|
||||
return self.schema
|
||||
elif key == ComponentName.Identifier:
|
||||
return self.identifier
|
||||
else:
|
||||
raise ValueError(
|
||||
'Got a key of {}, expected one of {}'
|
||||
.format(key, list(ComponentName))
|
||||
)
|
||||
|
||||
def replace_dict(self, dct: Dict[ComponentName, str]):
|
||||
kwargs: Dict[str, str] = {}
|
||||
for k, v in dct.items():
|
||||
kwargs[str(k)] = v
|
||||
return self.replace(**kwargs)
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
from dbt.contracts.graph.manifest import CompileResultNode
|
||||
from dbt.contracts.graph.unparsed import (
|
||||
FreshnessStatus, FreshnessThreshold
|
||||
FreshnessThreshold
|
||||
)
|
||||
from dbt.contracts.graph.parsed import ParsedSourceDefinition
|
||||
from dbt.contracts.util import (
|
||||
BaseArtifactMetadata,
|
||||
ArtifactMixin,
|
||||
Writable,
|
||||
VersionedSchema,
|
||||
Replaceable,
|
||||
schema_version,
|
||||
@@ -18,18 +17,21 @@ from dbt.logger import (
|
||||
GLOBAL_LOGGER as logger,
|
||||
)
|
||||
from dbt.utils import lowercase
|
||||
from hologram.helpers import StrEnum
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
|
||||
import agate
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Union, Dict, List, Optional, Any, NamedTuple, Sequence
|
||||
from typing import (
|
||||
Union, Dict, List, Optional, Any, NamedTuple, Sequence,
|
||||
)
|
||||
|
||||
from dbt.clients.system import write_json
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimingInfo(JsonSchemaMixin):
|
||||
class TimingInfo(dbtClassMixin):
|
||||
name: str
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
@@ -55,50 +57,73 @@ class collect_timing_info:
|
||||
logger.debug('finished collecting timing info')
|
||||
|
||||
|
||||
class NodeStatus(StrEnum):
|
||||
Success = "success"
|
||||
Error = "error"
|
||||
Fail = "fail"
|
||||
Warn = "warn"
|
||||
Skipped = "skipped"
|
||||
Pass = "pass"
|
||||
RuntimeErr = "runtime error"
|
||||
|
||||
|
||||
class RunStatus(StrEnum):
|
||||
Success = NodeStatus.Success
|
||||
Error = NodeStatus.Error
|
||||
Skipped = NodeStatus.Skipped
|
||||
|
||||
|
||||
class TestStatus(StrEnum):
|
||||
Pass = NodeStatus.Pass
|
||||
Error = NodeStatus.Error
|
||||
Fail = NodeStatus.Fail
|
||||
Warn = NodeStatus.Warn
|
||||
|
||||
|
||||
class FreshnessStatus(StrEnum):
|
||||
Pass = NodeStatus.Pass
|
||||
Warn = NodeStatus.Warn
|
||||
Error = NodeStatus.Error
|
||||
RuntimeErr = NodeStatus.RuntimeErr
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseResult(JsonSchemaMixin):
|
||||
class BaseResult(dbtClassMixin):
|
||||
status: Union[RunStatus, TestStatus, FreshnessStatus]
|
||||
timing: List[TimingInfo]
|
||||
thread_id: str
|
||||
execution_time: float
|
||||
adapter_response: Dict[str, Any]
|
||||
message: Optional[Union[str, int]]
|
||||
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
if 'message' not in data:
|
||||
data['message'] = None
|
||||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
class NodeResult(BaseResult):
|
||||
node: CompileResultNode
|
||||
error: Optional[str] = None
|
||||
status: Union[None, str, int, bool] = None
|
||||
execution_time: Union[str, int] = 0
|
||||
thread_id: Optional[str] = None
|
||||
timing: List[TimingInfo] = field(default_factory=list)
|
||||
fail: Optional[bool] = None
|
||||
warn: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PartialResult(BaseResult, Writable):
|
||||
pass
|
||||
|
||||
# if the result got to the point where it could be skipped/failed, we would
|
||||
# be returning a real result, not a partial.
|
||||
@property
|
||||
def skipped(self):
|
||||
return False
|
||||
|
||||
|
||||
@dataclass
|
||||
class WritableRunModelResult(BaseResult, Writable):
|
||||
skip: bool = False
|
||||
class RunResult(NodeResult):
|
||||
agate_table: Optional[agate.Table] = field(
|
||||
default=None, metadata={
|
||||
'serialize': lambda x: None, 'deserialize': lambda x: None
|
||||
}
|
||||
)
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
return self.skip
|
||||
return self.status == RunStatus.Skipped
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunModelResult(WritableRunModelResult):
|
||||
agate_table: Optional[agate.Table] = None
|
||||
|
||||
def to_dict(self, *args, **kwargs):
|
||||
dct = super().to_dict(*args, **kwargs)
|
||||
dct.pop('agate_table', None)
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionResult(JsonSchemaMixin):
|
||||
class ExecutionResult(dbtClassMixin):
|
||||
results: Sequence[BaseResult]
|
||||
elapsed_time: float
|
||||
|
||||
@@ -112,9 +137,6 @@ class ExecutionResult(JsonSchemaMixin):
|
||||
return self.results[idx]
|
||||
|
||||
|
||||
RunResult = Union[PartialResult, WritableRunModelResult]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunResultsMetadata(BaseArtifactMetadata):
|
||||
dbt_schema_version: str = field(
|
||||
@@ -123,33 +145,69 @@ class RunResultsMetadata(BaseArtifactMetadata):
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('run-results', 1)
|
||||
class RunResultsArtifact(
|
||||
class RunResultOutput(BaseResult):
|
||||
unique_id: str
|
||||
|
||||
|
||||
def process_run_result(result: RunResult) -> RunResultOutput:
|
||||
return RunResultOutput(
|
||||
unique_id=result.node.unique_id,
|
||||
status=result.status,
|
||||
timing=result.timing,
|
||||
thread_id=result.thread_id,
|
||||
execution_time=result.execution_time,
|
||||
message=result.message,
|
||||
adapter_response=result.adapter_response
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunExecutionResult(
|
||||
ExecutionResult,
|
||||
ArtifactMixin,
|
||||
):
|
||||
results: Sequence[RunResult]
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
def write(self, path: str):
|
||||
writable = RunResultsArtifact.from_execution_results(
|
||||
results=self.results,
|
||||
elapsed_time=self.elapsed_time,
|
||||
generated_at=self.generated_at,
|
||||
args=self.args,
|
||||
)
|
||||
writable.write(path)
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('run-results', 1)
|
||||
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
|
||||
results: Sequence[RunResultOutput]
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def from_node_results(
|
||||
def from_execution_results(
|
||||
cls,
|
||||
results: Sequence[RunResult],
|
||||
elapsed_time: float,
|
||||
generated_at: datetime,
|
||||
args: Dict,
|
||||
):
|
||||
processed_results = [process_run_result(result) for result in results]
|
||||
meta = RunResultsMetadata(
|
||||
dbt_schema_version=str(cls.dbt_schema_version),
|
||||
generated_at=generated_at,
|
||||
)
|
||||
return cls(
|
||||
metadata=meta,
|
||||
results=results,
|
||||
results=processed_results,
|
||||
elapsed_time=elapsed_time,
|
||||
args=args
|
||||
)
|
||||
|
||||
def write(self, path: str):
|
||||
write_json(path, self.to_dict(omit_none=False))
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunOperationResult(ExecutionResult):
|
||||
@@ -174,7 +232,7 @@ class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
|
||||
elapsed_time: float,
|
||||
generated_at: datetime,
|
||||
):
|
||||
meta = RunResultsMetadata(
|
||||
meta = RunOperationResultMetadata(
|
||||
dbt_schema_version=str(cls.dbt_schema_version),
|
||||
generated_at=generated_at,
|
||||
)
|
||||
@@ -185,59 +243,56 @@ class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
|
||||
success=success,
|
||||
)
|
||||
|
||||
# due to issues with typing.Union collapsing subclasses, this can't subclass
|
||||
# PartialResult
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFreshnessResultMixin(JsonSchemaMixin):
|
||||
class SourceFreshnessResult(NodeResult):
|
||||
node: ParsedSourceDefinition
|
||||
status: FreshnessStatus
|
||||
max_loaded_at: datetime
|
||||
snapshotted_at: datetime
|
||||
age: float
|
||||
|
||||
|
||||
# due to issues with typing.Union collapsing subclasses, this can't subclass
|
||||
# PartialResult
|
||||
@dataclass
|
||||
class SourceFreshnessResult(BaseResult, Writable, SourceFreshnessResultMixin):
|
||||
node: ParsedSourceDefinition
|
||||
status: FreshnessStatus = FreshnessStatus.Pass
|
||||
|
||||
def __post_init__(self):
|
||||
self.fail = self.status == 'error'
|
||||
|
||||
@property
|
||||
def warned(self):
|
||||
return self.status == 'warn'
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
return False
|
||||
|
||||
|
||||
def _copykeys(src, keys, **updates):
|
||||
return {k: getattr(src, k) for k in keys}
|
||||
|
||||
|
||||
class FreshnessErrorEnum(StrEnum):
|
||||
runtime_error = 'runtime error'
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFreshnessRuntimeError(JsonSchemaMixin):
|
||||
class SourceFreshnessRuntimeError(dbtClassMixin):
|
||||
unique_id: str
|
||||
error: str
|
||||
state: FreshnessErrorEnum
|
||||
error: Optional[Union[str, int]]
|
||||
status: FreshnessErrorEnum
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceFreshnessOutput(JsonSchemaMixin):
|
||||
class SourceFreshnessOutput(dbtClassMixin):
|
||||
unique_id: str
|
||||
max_loaded_at: datetime
|
||||
snapshotted_at: datetime
|
||||
max_loaded_at_time_ago_in_s: float
|
||||
state: FreshnessStatus
|
||||
status: FreshnessStatus
|
||||
criteria: FreshnessThreshold
|
||||
adapter_response: Dict[str, Any]
|
||||
|
||||
|
||||
FreshnessNodeResult = Union[PartialResult, SourceFreshnessResult]
|
||||
@dataclass
|
||||
class PartialSourceFreshnessResult(NodeResult):
|
||||
status: FreshnessStatus
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
return False
|
||||
|
||||
|
||||
FreshnessNodeResult = Union[PartialSourceFreshnessResult,
|
||||
SourceFreshnessResult]
|
||||
FreshnessNodeOutput = Union[SourceFreshnessRuntimeError, SourceFreshnessOutput]
|
||||
|
||||
|
||||
@@ -245,11 +300,11 @@ def process_freshness_result(
|
||||
result: FreshnessNodeResult
|
||||
) -> FreshnessNodeOutput:
|
||||
unique_id = result.node.unique_id
|
||||
if result.error is not None:
|
||||
if result.status == FreshnessStatus.RuntimeErr:
|
||||
return SourceFreshnessRuntimeError(
|
||||
unique_id=unique_id,
|
||||
error=result.error,
|
||||
state=FreshnessErrorEnum.runtime_error,
|
||||
error=result.message,
|
||||
status=FreshnessErrorEnum.runtime_error,
|
||||
)
|
||||
|
||||
# we know that this must be a SourceFreshnessResult
|
||||
@@ -271,8 +326,9 @@ def process_freshness_result(
|
||||
max_loaded_at=result.max_loaded_at,
|
||||
snapshotted_at=result.snapshotted_at,
|
||||
max_loaded_at_time_ago_in_s=result.age,
|
||||
state=result.status,
|
||||
status=result.status,
|
||||
criteria=criteria,
|
||||
adapter_response=result.adapter_response
|
||||
)
|
||||
|
||||
|
||||
@@ -330,40 +386,40 @@ CatalogKey = NamedTuple(
|
||||
|
||||
|
||||
@dataclass
|
||||
class StatsItem(JsonSchemaMixin):
|
||||
class StatsItem(dbtClassMixin):
|
||||
id: str
|
||||
label: str
|
||||
value: Primitive
|
||||
description: Optional[str]
|
||||
include: bool
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
StatsDict = Dict[str, StatsItem]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ColumnMetadata(JsonSchemaMixin):
|
||||
class ColumnMetadata(dbtClassMixin):
|
||||
type: str
|
||||
comment: Optional[str]
|
||||
index: int
|
||||
name: str
|
||||
comment: Optional[str] = None
|
||||
|
||||
|
||||
ColumnMap = Dict[str, ColumnMetadata]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TableMetadata(JsonSchemaMixin):
|
||||
class TableMetadata(dbtClassMixin):
|
||||
type: str
|
||||
database: Optional[str]
|
||||
schema: str
|
||||
name: str
|
||||
comment: Optional[str]
|
||||
owner: Optional[str]
|
||||
database: Optional[str] = None
|
||||
comment: Optional[str] = None
|
||||
owner: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogTable(JsonSchemaMixin, Replaceable):
|
||||
class CatalogTable(dbtClassMixin, Replaceable):
|
||||
metadata: TableMetadata
|
||||
columns: ColumnMap
|
||||
stats: StatsDict
|
||||
@@ -386,12 +442,18 @@ class CatalogMetadata(BaseArtifactMetadata):
|
||||
|
||||
|
||||
@dataclass
|
||||
class CatalogResults(JsonSchemaMixin):
|
||||
class CatalogResults(dbtClassMixin):
|
||||
nodes: Dict[str, CatalogTable]
|
||||
sources: Dict[str, CatalogTable]
|
||||
errors: Optional[List[str]]
|
||||
errors: Optional[List[str]] = None
|
||||
_compile_results: Optional[Any] = None
|
||||
|
||||
def __post_serialize__(self, dct):
|
||||
dct = super().__post_serialize__(dct)
|
||||
if '_compile_results' in dct:
|
||||
del dct['_compile_results']
|
||||
return dct
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('catalog', 1)
|
||||
|
||||
@@ -5,13 +5,12 @@ from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Union, List, Any, Dict, Type, Sequence
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
|
||||
from dbt.contracts.graph.compiled import CompileResultNode
|
||||
from dbt.contracts.graph.manifest import WritableManifest
|
||||
from dbt.contracts.results import (
|
||||
TimingInfo,
|
||||
RunResult, RunResultsArtifact, TimingInfo,
|
||||
CatalogArtifact,
|
||||
CatalogResults,
|
||||
ExecutionResult,
|
||||
@@ -19,8 +18,7 @@ from dbt.contracts.results import (
|
||||
FreshnessResult,
|
||||
RunOperationResult,
|
||||
RunOperationResultsArtifact,
|
||||
RunResult,
|
||||
RunResultsArtifact,
|
||||
RunExecutionResult,
|
||||
)
|
||||
from dbt.contracts.util import VersionedSchema, schema_version
|
||||
from dbt.exceptions import InternalException
|
||||
@@ -35,16 +33,25 @@ TaskID = uuid.UUID
|
||||
|
||||
|
||||
@dataclass
|
||||
class RPCParameters(JsonSchemaMixin):
|
||||
timeout: Optional[float]
|
||||
class RPCParameters(dbtClassMixin):
|
||||
task_tags: TaskTags
|
||||
timeout: Optional[float]
|
||||
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data, omit_none=True):
|
||||
data = super().__pre_deserialize__(data)
|
||||
if 'timeout' not in data:
|
||||
data['timeout'] = None
|
||||
if 'task_tags' not in data:
|
||||
data['task_tags'] = None
|
||||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
class RPCExecParameters(RPCParameters):
|
||||
name: str
|
||||
sql: str
|
||||
macros: Optional[str]
|
||||
macros: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -80,6 +87,7 @@ class RPCTestParameters(RPCCompileParameters):
|
||||
data: bool = False
|
||||
schema: bool = False
|
||||
state: Optional[str] = None
|
||||
defer: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -132,7 +140,7 @@ class StatusParameters(RPCParameters):
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCSettings(JsonSchemaMixin):
|
||||
class GCSettings(dbtClassMixin):
|
||||
# start evicting the longest-ago-ended tasks here
|
||||
maxsize: int
|
||||
# start evicting all tasks before now - auto_reap_age when we have this
|
||||
@@ -225,12 +233,12 @@ class RemoteCompileResult(RemoteCompileResultMixin):
|
||||
@dataclass
|
||||
@schema_version('remote-execution-result', 1)
|
||||
class RemoteExecutionResult(ExecutionResult, RemoteResult):
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
results: Sequence[RunResult]
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
generated_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
def write(self, path: str):
|
||||
writable = RunResultsArtifact.from_node_results(
|
||||
writable = RunResultsArtifact.from_execution_results(
|
||||
generated_at=self.generated_at,
|
||||
results=self.results,
|
||||
elapsed_time=self.elapsed_time,
|
||||
@@ -241,11 +249,11 @@ class RemoteExecutionResult(ExecutionResult, RemoteResult):
|
||||
@classmethod
|
||||
def from_local_result(
|
||||
cls,
|
||||
base: RunResultsArtifact,
|
||||
base: RunExecutionResult,
|
||||
logs: List[LogMessage],
|
||||
) -> 'RemoteExecutionResult':
|
||||
return cls(
|
||||
generated_at=base.metadata.generated_at,
|
||||
generated_at=base.generated_at,
|
||||
results=base.results,
|
||||
elapsed_time=base.elapsed_time,
|
||||
args=base.args,
|
||||
@@ -254,7 +262,7 @@ class RemoteExecutionResult(ExecutionResult, RemoteResult):
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResultTable(JsonSchemaMixin):
|
||||
class ResultTable(dbtClassMixin):
|
||||
column_names: List[str]
|
||||
rows: List[Any]
|
||||
|
||||
@@ -411,21 +419,31 @@ class TaskHandlerState(StrEnum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskTiming(JsonSchemaMixin):
|
||||
class TaskTiming(dbtClassMixin):
|
||||
state: TaskHandlerState
|
||||
start: Optional[datetime]
|
||||
end: Optional[datetime]
|
||||
elapsed: Optional[float]
|
||||
|
||||
# These ought to be defaults but superclass order doesn't
|
||||
# allow that to work
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
for field_name in ('start', 'end', 'elapsed'):
|
||||
if field_name not in data:
|
||||
data[field_name] = None
|
||||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskRow(TaskTiming):
|
||||
task_id: TaskID
|
||||
request_id: Union[str, int]
|
||||
request_source: str
|
||||
method: str
|
||||
timeout: Optional[float]
|
||||
tags: TaskTags
|
||||
request_id: Union[str, int]
|
||||
tags: TaskTags = None
|
||||
timeout: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -451,7 +469,7 @@ class KillResult(RemoteResult):
|
||||
@dataclass
|
||||
@schema_version('remote-manifest-result', 1)
|
||||
class GetManifestResult(RemoteResult):
|
||||
manifest: Optional[WritableManifest]
|
||||
manifest: Optional[WritableManifest] = None
|
||||
|
||||
|
||||
# this is kind of carefuly structured: BlocksManifestTasks is implied by
|
||||
@@ -475,6 +493,16 @@ class PollResult(RemoteResult, TaskTiming):
|
||||
end: Optional[datetime]
|
||||
elapsed: Optional[float]
|
||||
|
||||
# These ought to be defaults but superclass order doesn't
|
||||
# allow that to work
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
data = super().__pre_deserialize__(data)
|
||||
for field_name in ('start', 'end', 'elapsed'):
|
||||
if field_name not in data:
|
||||
data[field_name] = None
|
||||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('poll-remote-deps-result', 1)
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
from typing import List, Dict, Any, Union
|
||||
|
||||
|
||||
@dataclass
|
||||
class SelectorDefinition(JsonSchemaMixin):
|
||||
class SelectorDefinition(dbtClassMixin):
|
||||
name: str
|
||||
definition: Union[str, Dict[str, Any]]
|
||||
description: str = ''
|
||||
|
||||
|
||||
@dataclass
|
||||
class SelectorFile(JsonSchemaMixin):
|
||||
class SelectorFile(dbtClassMixin):
|
||||
selectors: List[SelectorDefinition]
|
||||
version: int = 2
|
||||
|
||||
|
||||
@@ -7,13 +7,12 @@ from typing import (
|
||||
|
||||
from dbt.clients.system import write_json, read_json
|
||||
from dbt.exceptions import (
|
||||
IncompatibleSchemaException,
|
||||
InternalException,
|
||||
RuntimeException,
|
||||
)
|
||||
from dbt.version import __version__
|
||||
from dbt.tracking import get_invocation_id
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
MacroKey = Tuple[str, str]
|
||||
SourceKey = Tuple[str, str]
|
||||
@@ -57,8 +56,10 @@ class Mergeable(Replaceable):
|
||||
|
||||
|
||||
class Writable:
|
||||
def write(self, path: str, omit_none: bool = False):
|
||||
write_json(path, self.to_dict(omit_none=omit_none)) # type: ignore
|
||||
def write(self, path: str):
|
||||
write_json(
|
||||
path, self.to_dict(omit_none=False) # type: ignore
|
||||
)
|
||||
|
||||
|
||||
class AdditionalPropertiesMixin:
|
||||
@@ -69,22 +70,41 @@ class AdditionalPropertiesMixin:
|
||||
"""
|
||||
ADDITIONAL_PROPERTIES = True
|
||||
|
||||
# This takes attributes in the dictionary that are
|
||||
# not in the class definitions and puts them in an
|
||||
# _extra dict in the class
|
||||
@classmethod
|
||||
def from_dict(cls, data, validate=True):
|
||||
self = super().from_dict(data=data, validate=validate)
|
||||
keys = self.to_dict(validate=False, omit_none=False)
|
||||
def __pre_deserialize__(cls, data):
|
||||
# dir() did not work because fields with
|
||||
# metadata settings are not found
|
||||
# The original version of this would create the
|
||||
# object first and then update extra with the
|
||||
# extra keys, but that won't work here, so
|
||||
# we're copying the dict so we don't insert the
|
||||
# _extra in the original data. This also requires
|
||||
# that Mashumaro actually build the '_extra' field
|
||||
cls_keys = cls._get_field_names()
|
||||
new_dict = {}
|
||||
for key, value in data.items():
|
||||
if key not in keys:
|
||||
self.extra[key] = value
|
||||
return self
|
||||
if key not in cls_keys and key != '_extra':
|
||||
if '_extra' not in new_dict:
|
||||
new_dict['_extra'] = {}
|
||||
new_dict['_extra'][key] = value
|
||||
else:
|
||||
new_dict[key] = value
|
||||
data = new_dict
|
||||
data = super().__pre_deserialize__(data)
|
||||
return data
|
||||
|
||||
def to_dict(self, omit_none=True, validate=False):
|
||||
data = super().to_dict(omit_none=omit_none, validate=validate)
|
||||
def __post_serialize__(self, dct):
|
||||
data = super().__post_serialize__(dct)
|
||||
data.update(self.extra)
|
||||
if '_extra' in data:
|
||||
del data['_extra']
|
||||
return data
|
||||
|
||||
def replace(self, **kwargs):
|
||||
dct = self.to_dict(omit_none=False, validate=False)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
dct.update(kwargs)
|
||||
return self.from_dict(dct)
|
||||
|
||||
@@ -106,7 +126,8 @@ class Readable:
|
||||
return cls.from_dict(data) # type: ignore
|
||||
|
||||
|
||||
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/dbt/{name}/v{version}.json'
|
||||
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/'
|
||||
SCHEMA_PATH = 'dbt/{name}/v{version}.json'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -114,12 +135,16 @@ class SchemaVersion:
|
||||
name: str
|
||||
version: int
|
||||
|
||||
def __str__(self) -> str:
|
||||
return BASE_SCHEMAS_URL.format(
|
||||
@property
|
||||
def path(self) -> str:
|
||||
return SCHEMA_PATH.format(
|
||||
name=self.name,
|
||||
version=self.version,
|
||||
version=self.version
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return BASE_SCHEMAS_URL + self.path
|
||||
|
||||
|
||||
SCHEMA_VERSION_KEY = 'dbt_schema_version'
|
||||
|
||||
@@ -135,7 +160,7 @@ def get_metadata_env() -> Dict[str, str]:
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BaseArtifactMetadata(JsonSchemaMixin):
|
||||
class BaseArtifactMetadata(dbtClassMixin):
|
||||
dbt_schema_version: str
|
||||
dbt_version: str = __version__
|
||||
generated_at: datetime = dataclasses.field(
|
||||
@@ -158,7 +183,7 @@ def schema_version(name: str, version: int):
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class VersionedSchema(JsonSchemaMixin):
|
||||
class VersionedSchema(dbtClassMixin):
|
||||
dbt_schema_version: ClassVar[SchemaVersion]
|
||||
|
||||
@classmethod
|
||||
@@ -180,18 +205,9 @@ class ArtifactMixin(VersionedSchema, Writable, Readable):
|
||||
metadata: BaseArtifactMetadata
|
||||
|
||||
@classmethod
|
||||
def from_dict(
|
||||
cls: Type[T], data: Dict[str, Any], validate: bool = True
|
||||
) -> T:
|
||||
def validate(cls, data):
|
||||
super().validate(data)
|
||||
if cls.dbt_schema_version is None:
|
||||
raise InternalException(
|
||||
'Cannot call from_dict with no schema version!'
|
||||
)
|
||||
|
||||
if validate:
|
||||
expected = str(cls.dbt_schema_version)
|
||||
found = data.get('metadata', {}).get(SCHEMA_VERSION_KEY)
|
||||
if found != expected:
|
||||
raise IncompatibleSchemaException(expected, found)
|
||||
|
||||
return super().from_dict(data=data, validate=validate)
|
||||
|
||||
169
core/dbt/dataclass_schema.py
Normal file
169
core/dbt/dataclass_schema.py
Normal file
@@ -0,0 +1,169 @@
|
||||
from typing import (
|
||||
Type, ClassVar, cast,
|
||||
)
|
||||
import re
|
||||
from dataclasses import fields
|
||||
from enum import Enum
|
||||
from datetime import datetime
|
||||
from dateutil.parser import parse
|
||||
|
||||
from hologram import JsonSchemaMixin, FieldEncoder, ValidationError
|
||||
|
||||
# type: ignore
|
||||
from mashumaro import DataClassDictMixin
|
||||
from mashumaro.config import (
|
||||
TO_DICT_ADD_OMIT_NONE_FLAG, BaseConfig as MashBaseConfig
|
||||
)
|
||||
from mashumaro.types import SerializableType, SerializationStrategy
|
||||
|
||||
|
||||
class DateTimeSerialization(SerializationStrategy):
|
||||
def serialize(self, value):
|
||||
out = value.isoformat()
|
||||
# Assume UTC if timezone is missing
|
||||
if value.tzinfo is None:
|
||||
out = out + "Z"
|
||||
return out
|
||||
|
||||
def deserialize(self, value):
|
||||
return (
|
||||
value if isinstance(value, datetime) else parse(cast(str, value))
|
||||
)
|
||||
|
||||
|
||||
# This class pulls in both JsonSchemaMixin from Hologram and
|
||||
# DataClassDictMixin from our fork of Mashumaro. The 'to_dict'
|
||||
# and 'from_dict' methods come from Mashumaro. Building
|
||||
# jsonschemas for every class and the 'validate' method
|
||||
# come from Hologram.
|
||||
class dbtClassMixin(DataClassDictMixin, JsonSchemaMixin):
|
||||
"""Mixin which adds methods to generate a JSON schema and
|
||||
convert to and from JSON encodable dicts with validation
|
||||
against the schema
|
||||
"""
|
||||
|
||||
class Config(MashBaseConfig):
|
||||
code_generation_options = [
|
||||
TO_DICT_ADD_OMIT_NONE_FLAG,
|
||||
]
|
||||
serialization_strategy = {
|
||||
datetime: DateTimeSerialization(),
|
||||
}
|
||||
|
||||
_hyphenated: ClassVar[bool] = False
|
||||
ADDITIONAL_PROPERTIES: ClassVar[bool] = False
|
||||
|
||||
# This is called by the mashumaro to_dict in order to handle
|
||||
# nested classes.
|
||||
# Munges the dict that's returned.
|
||||
def __post_serialize__(self, dct):
|
||||
if self._hyphenated:
|
||||
new_dict = {}
|
||||
for key in dct:
|
||||
if '_' in key:
|
||||
new_key = key.replace('_', '-')
|
||||
new_dict[new_key] = dct[key]
|
||||
else:
|
||||
new_dict[key] = dct[key]
|
||||
dct = new_dict
|
||||
|
||||
return dct
|
||||
|
||||
# This is called by the mashumaro _from_dict method, before
|
||||
# performing the conversion to a dict
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, data):
|
||||
# `data` might not be a dict, e.g. for `query_comment`, which accepts
|
||||
# a dict or a string; only snake-case for dict values.
|
||||
if cls._hyphenated and isinstance(data, dict):
|
||||
new_dict = {}
|
||||
for key in data:
|
||||
if '-' in key:
|
||||
new_key = key.replace('-', '_')
|
||||
new_dict[new_key] = data[key]
|
||||
else:
|
||||
new_dict[key] = data[key]
|
||||
data = new_dict
|
||||
return data
|
||||
|
||||
# This is used in the hologram._encode_field method, which calls
|
||||
# a 'to_dict' method which does not have the same parameters in
|
||||
# hologram and in mashumaro.
|
||||
def _local_to_dict(self, **kwargs):
|
||||
args = {}
|
||||
if 'omit_none' in kwargs:
|
||||
args['omit_none'] = kwargs['omit_none']
|
||||
return self.to_dict(**args)
|
||||
|
||||
|
||||
class ValidatedStringMixin(str, SerializableType):
|
||||
ValidationRegex = ''
|
||||
|
||||
@classmethod
|
||||
def _deserialize(cls, value: str) -> 'ValidatedStringMixin':
|
||||
cls.validate(value)
|
||||
return ValidatedStringMixin(value)
|
||||
|
||||
def _serialize(self) -> str:
|
||||
return str(self)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, value):
|
||||
res = re.match(cls.ValidationRegex, value)
|
||||
|
||||
if res is None:
|
||||
raise ValidationError(f"Invalid value: {value}") # TODO
|
||||
|
||||
|
||||
# These classes must be in this order or it doesn't work
|
||||
class StrEnum(str, SerializableType, Enum):
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
# https://docs.python.org/3.6/library/enum.html#using-automatic-values
|
||||
def _generate_next_value_(name, *_):
|
||||
return name
|
||||
|
||||
def _serialize(self) -> str:
|
||||
return self.value
|
||||
|
||||
@classmethod
|
||||
def _deserialize(cls, value: str):
|
||||
return cls(value)
|
||||
|
||||
|
||||
class HyphenatedDbtClassMixin(dbtClassMixin):
|
||||
# used by from_dict/to_dict
|
||||
_hyphenated: ClassVar[bool] = True
|
||||
|
||||
# used by jsonschema validation, _get_fields
|
||||
@classmethod
|
||||
def field_mapping(cls):
|
||||
result = {}
|
||||
for field in fields(cls):
|
||||
skip = field.metadata.get("preserve_underscore")
|
||||
if skip:
|
||||
continue
|
||||
|
||||
if "_" in field.name:
|
||||
result[field.name] = field.name.replace("_", "-")
|
||||
return result
|
||||
|
||||
|
||||
class ExtensibleDbtClassMixin(dbtClassMixin):
|
||||
ADDITIONAL_PROPERTIES = True
|
||||
|
||||
|
||||
# This is used by Hologram in jsonschema validation
|
||||
def register_pattern(base_type: Type, pattern: str) -> None:
|
||||
"""base_type should be a typing.NewType that should always have the given
|
||||
regex pattern. That means that its underlying type ('__supertype__') had
|
||||
better be a str!
|
||||
"""
|
||||
|
||||
class PatternEncoder(FieldEncoder):
|
||||
@property
|
||||
def json_schema(self):
|
||||
return {"type": "string", "pattern": pattern}
|
||||
|
||||
dbtClassMixin.register_field_encoders({base_type: PatternEncoder()})
|
||||
@@ -48,7 +48,18 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
return self.revision
|
||||
|
||||
def nice_version_name(self):
|
||||
return 'revision {}'.format(self.revision)
|
||||
if self.revision == 'HEAD':
|
||||
return 'HEAD (default revision)'
|
||||
else:
|
||||
return 'revision {}'.format(self.revision)
|
||||
|
||||
def unpinned_msg(self):
|
||||
if self.revision == 'HEAD':
|
||||
return 'not pinned, using HEAD (default branch)'
|
||||
elif self.revision in ('main', 'master'):
|
||||
return f'pinned to the "{self.revision}" branch'
|
||||
else:
|
||||
return None
|
||||
|
||||
def _checkout(self):
|
||||
"""Performs a shallow clone of the repository into the downloads
|
||||
@@ -57,7 +68,7 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
the path to the checked out directory."""
|
||||
try:
|
||||
dir_ = git.clone_and_checkout(
|
||||
self.git, get_downloads_path(), branch=self.revision,
|
||||
self.git, get_downloads_path(), revision=self.revision,
|
||||
dirname=self._checkout_name
|
||||
)
|
||||
except ExecutableError as exc:
|
||||
@@ -72,11 +83,12 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
|
||||
|
||||
def _fetch_metadata(self, project, renderer) -> ProjectPackageMetadata:
|
||||
path = self._checkout()
|
||||
if self.revision == 'master' and self.warn_unpinned:
|
||||
|
||||
if self.unpinned_msg() and self.warn_unpinned:
|
||||
warn_or_error(
|
||||
'The git package "{}" is not pinned.\n\tThis can introduce '
|
||||
'The git package "{}" \n\tis {}.\n\tThis can introduce '
|
||||
'breaking changes into your project without warning!\n\nSee {}'
|
||||
.format(self.git, PIN_PACKAGE_URL),
|
||||
.format(self.git, self.unpinned_msg(), PIN_PACKAGE_URL),
|
||||
log_fmt=ui.yellow('WARNING: {}')
|
||||
)
|
||||
loaded = Project.from_project_root(path, renderer)
|
||||
@@ -133,7 +145,7 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
|
||||
def resolved(self) -> GitPinnedPackage:
|
||||
requested = set(self.revisions)
|
||||
if len(requested) == 0:
|
||||
requested = {'master'}
|
||||
requested = {'HEAD'}
|
||||
elif len(requested) > 1:
|
||||
raise_dependency_error(
|
||||
'git dependencies should contain exactly one version. '
|
||||
|
||||
@@ -7,14 +7,14 @@ from dbt.node_types import NodeType
|
||||
from dbt import flags
|
||||
from dbt.ui import line_wrap_message
|
||||
|
||||
import hologram
|
||||
import dbt.dataclass_schema
|
||||
|
||||
|
||||
def validator_error_message(exc):
|
||||
"""Given a hologram.ValidationError (which is basically a
|
||||
"""Given a dbt.dataclass_schema.ValidationError (which is basically a
|
||||
jsonschema.ValidationError), return the relevant parts as a string
|
||||
"""
|
||||
if not isinstance(exc, hologram.ValidationError):
|
||||
if not isinstance(exc, dbt.dataclass_schema.ValidationError):
|
||||
return str(exc)
|
||||
path = "[%s]" % "][".join(map(repr, exc.relative_path))
|
||||
return 'at path {}: {}'.format(path, exc.message)
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import os
|
||||
import multiprocessing
|
||||
if os.name != 'nt':
|
||||
# https://bugs.python.org/issue41567
|
||||
import multiprocessing.popen_spawn_posix # type: ignore
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# special support for CLI argument parsing.
|
||||
import itertools
|
||||
import yaml
|
||||
from dbt.clients.yaml_helper import yaml, Loader, Dumper # noqa: F401
|
||||
|
||||
from typing import (
|
||||
Dict, List, Optional, Tuple, Any, Union
|
||||
@@ -236,7 +236,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
|
||||
)
|
||||
|
||||
# if key isn't a valid method name, this will raise
|
||||
base = SelectionCriteria.from_dict(definition, dct)
|
||||
base = SelectionCriteria.selection_criteria_from_dict(definition, dct)
|
||||
if diff_arg is None:
|
||||
return base
|
||||
else:
|
||||
|
||||
@@ -25,8 +25,8 @@ def get_package_names(nodes):
|
||||
def alert_non_existence(raw_spec, nodes):
|
||||
if len(nodes) == 0:
|
||||
warn_or_error(
|
||||
f"The selector '{str(raw_spec)}' does not match any nodes and will"
|
||||
f" be ignored"
|
||||
f"The selection criterion '{str(raw_spec)}' does not match"
|
||||
f" any nodes"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional
|
||||
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
from .graph import UniqueId
|
||||
|
||||
|
||||
@@ -102,7 +102,9 @@ class SelectionCriteria:
|
||||
return method_name, method_arguments
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria':
|
||||
def selection_criteria_from_dict(
|
||||
cls, raw: Any, dct: Dict[str, Any]
|
||||
) -> 'SelectionCriteria':
|
||||
if 'value' not in dct:
|
||||
raise RuntimeException(
|
||||
f'Invalid node spec "{raw}" - no search value!'
|
||||
@@ -150,7 +152,7 @@ class SelectionCriteria:
|
||||
# bad spec!
|
||||
raise RuntimeException(f'Invalid selector spec "{raw}"')
|
||||
|
||||
return cls.from_dict(raw, result.groupdict())
|
||||
return cls.selection_criteria_from_dict(raw, result.groupdict())
|
||||
|
||||
|
||||
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
|
||||
|
||||
@@ -2,14 +2,27 @@
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import NewType, Tuple, AbstractSet
|
||||
from typing import Tuple, AbstractSet, Union
|
||||
|
||||
from hologram import (
|
||||
FieldEncoder, JsonSchemaMixin, JsonDict, ValidationError
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ValidationError, StrEnum,
|
||||
)
|
||||
from hologram.helpers import StrEnum
|
||||
from hologram import FieldEncoder, JsonDict
|
||||
from mashumaro.types import SerializableType
|
||||
|
||||
Port = NewType('Port', int)
|
||||
|
||||
class Port(int, SerializableType):
|
||||
@classmethod
|
||||
def _deserialize(cls, value: Union[int, str]) -> 'Port':
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
raise ValidationError(f'Cannot encode {value} into port number')
|
||||
|
||||
return Port(value)
|
||||
|
||||
def _serialize(self) -> int:
|
||||
return self
|
||||
|
||||
|
||||
class PortEncoder(FieldEncoder):
|
||||
@@ -66,12 +79,12 @@ class NVEnum(StrEnum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class NoValue(JsonSchemaMixin):
|
||||
class NoValue(dbtClassMixin):
|
||||
"""Sometimes, you want a way to say none that isn't None"""
|
||||
novalue: NVEnum = NVEnum.novalue
|
||||
|
||||
|
||||
JsonSchemaMixin.register_field_encoders({
|
||||
dbtClassMixin.register_field_encoders({
|
||||
Port: PortEncoder(),
|
||||
timedelta: TimeDeltaFieldEncoder(),
|
||||
Path: PathEncoder(),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
import json
|
||||
|
||||
from typing import Union, Dict, Any
|
||||
|
||||
@@ -7,15 +7,15 @@
|
||||
{{ write(sql) }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- set status, res = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}
|
||||
{%- set res, table = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}
|
||||
{%- if name is not none -%}
|
||||
{{ store_result(name, status=status, agate_table=res) }}
|
||||
{{ store_result(name, response=res, agate_table=table) }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- endif -%}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro noop_statement(name=None, status=None, res=None) -%}
|
||||
{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}
|
||||
{%- set sql = caller() -%}
|
||||
|
||||
{%- if name == 'main' -%}
|
||||
@@ -24,7 +24,7 @@
|
||||
{%- endif -%}
|
||||
|
||||
{%- if name is not none -%}
|
||||
{{ store_result(name, status=status, agate_table=res) }}
|
||||
{{ store_raw_result(name, message=message, code=code, rows_affected=rows_affected, agate_table=res) }}
|
||||
{%- endif -%}
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}
|
||||
{%- set predicates = [] if predicates is none else [] + predicates -%}
|
||||
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
|
||||
{%- set update_columns = config.get('merge_update_columns', default = dest_columns | map(attribute="quoted") | list) -%}
|
||||
{%- set sql_header = config.get('sql_header', none) -%}
|
||||
|
||||
{% if unique_key %}
|
||||
@@ -37,8 +38,8 @@
|
||||
|
||||
{% if unique_key %}
|
||||
when matched then update set
|
||||
{% for column in dest_columns -%}
|
||||
{{ adapter.quote(column.name) }} = DBT_INTERNAL_SOURCE.{{ adapter.quote(column.name) }}
|
||||
{% for column_name in update_columns -%}
|
||||
{{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}
|
||||
{%- if not loop.last %}, {%- endif %}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
@@ -112,7 +112,7 @@
|
||||
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
|
||||
|
||||
{%- set agate_table = load_agate_table() -%}
|
||||
{%- do store_result('agate_table', status='OK', agate_table=agate_table) -%}
|
||||
{%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}
|
||||
|
||||
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
||||
|
||||
@@ -129,11 +129,11 @@
|
||||
{% set create_table_sql = create_csv_table(model, agate_table) %}
|
||||
{% endif %}
|
||||
|
||||
{% set status = 'CREATE' if full_refresh_mode else 'INSERT' %}
|
||||
{% set num_rows = (agate_table.rows | length) %}
|
||||
{% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}
|
||||
{% set rows_affected = (agate_table.rows | length) %}
|
||||
{% set sql = load_csv_rows(model, agate_table) %}
|
||||
|
||||
{% call noop_statement('main', status ~ ' ' ~ num_rows) %}
|
||||
{% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}
|
||||
{{ create_table_sql }};
|
||||
-- dbt seed --
|
||||
{{ sql }}
|
||||
|
||||
@@ -13,13 +13,7 @@
|
||||
|
||||
when matched
|
||||
and DBT_INTERNAL_DEST.dbt_valid_to is null
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type = 'update'
|
||||
then update
|
||||
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
|
||||
|
||||
when matched
|
||||
and DBT_INTERNAL_DEST.dbt_valid_to is null
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type = 'delete'
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')
|
||||
then update
|
||||
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
|
||||
|
||||
|
||||
@@ -164,7 +164,11 @@
|
||||
{%- for col in check_cols -%}
|
||||
{{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}
|
||||
or
|
||||
({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null)
|
||||
(
|
||||
(({{ snapshotted_rel }}.{{ col }} is null) and not ({{ current_rel }}.{{ col }} is null))
|
||||
or
|
||||
((not {{ snapshotted_rel }}.{{ col }} is null) and ({{ current_rel }}.{{ col }} is null))
|
||||
)
|
||||
{%- if not loop.last %} or {% endif -%}
|
||||
{%- endfor -%}
|
||||
{%- endif -%}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
{%- materialization test, default -%}
|
||||
|
||||
{% call statement('main', fetch_result=True) -%}
|
||||
select count(*) as validation_errors
|
||||
from (
|
||||
{{ sql }}
|
||||
) _dbt_internal_test
|
||||
{%- endcall %}
|
||||
|
||||
{%- endmaterialization -%}
|
||||
@@ -36,8 +36,7 @@ from validation_errors
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro test_accepted_values(model, values) %}
|
||||
{% test accepted_values(model, values) %}
|
||||
{% set macro = adapter.dispatch('test_accepted_values') %}
|
||||
{{ macro(model, values, **kwargs) }}
|
||||
{% endmacro %}
|
||||
{% endtest %}
|
||||
|
||||
@@ -9,9 +9,7 @@ where {{ column_name }} is null
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
|
||||
{% macro test_not_null(model) %}
|
||||
{% test not_null(model) %}
|
||||
{% set macro = adapter.dispatch('test_not_null') %}
|
||||
{{ macro(model, **kwargs) }}
|
||||
{% endmacro %}
|
||||
{% endtest %}
|
||||
|
||||
@@ -18,7 +18,7 @@ where child.id is not null
|
||||
|
||||
|
||||
|
||||
{% macro test_relationships(model, to, field) %}
|
||||
{% test relationships(model, to, field) %}
|
||||
{% set macro = adapter.dispatch('test_relationships') %}
|
||||
{{ macro(model, to, field, **kwargs) }}
|
||||
{% endmacro %}
|
||||
{% endtest %}
|
||||
|
||||
@@ -19,7 +19,7 @@ from (
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro test_unique(model) %}
|
||||
{% test unique(model) %}
|
||||
{% set macro = adapter.dispatch('test_unique') %}
|
||||
{{ macro(model, **kwargs) }}
|
||||
{% endmacro %}
|
||||
{% endtest %}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -13,7 +13,7 @@ from typing import Optional, List, ContextManager, Callable, Dict, Any, Set
|
||||
|
||||
import colorama
|
||||
import logbook
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
# Colorama needs some help on windows because we're using logger.info
|
||||
# intead of print(). If the Windows env doesn't have a TERM var set,
|
||||
@@ -45,11 +45,10 @@ DEBUG_LOG_FORMAT = (
|
||||
|
||||
|
||||
ExceptionInformation = str
|
||||
Extras = Dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogMessage(JsonSchemaMixin):
|
||||
class LogMessage(dbtClassMixin):
|
||||
timestamp: datetime
|
||||
message: str
|
||||
channel: str
|
||||
@@ -57,7 +56,7 @@ class LogMessage(JsonSchemaMixin):
|
||||
levelname: str
|
||||
thread_name: str
|
||||
process: int
|
||||
extra: Optional[Extras] = None
|
||||
extra: Optional[Dict[str, Any]] = None
|
||||
exc_info: Optional[ExceptionInformation] = None
|
||||
|
||||
@classmethod
|
||||
@@ -96,7 +95,8 @@ class JsonFormatter(LogMessageFormatter):
|
||||
# utils imports exceptions which imports logger...
|
||||
import dbt.utils
|
||||
log_message = super().__call__(record, handler)
|
||||
return json.dumps(log_message.to_dict(), cls=dbt.utils.JSONEncoder)
|
||||
dct = log_message.to_dict(omit_none=True)
|
||||
return json.dumps(dct, cls=dbt.utils.JSONEncoder)
|
||||
|
||||
|
||||
class FormatterMixin:
|
||||
@@ -128,6 +128,7 @@ class OutputHandler(logbook.StreamHandler, FormatterMixin):
|
||||
The `format_string` parameter only changes the default text output, not
|
||||
debug mode or json.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
stream,
|
||||
@@ -215,13 +216,14 @@ class TextOnly(logbook.Processor):
|
||||
|
||||
|
||||
class TimingProcessor(logbook.Processor):
|
||||
def __init__(self, timing_info: Optional[JsonSchemaMixin] = None):
|
||||
def __init__(self, timing_info: Optional[dbtClassMixin] = None):
|
||||
self.timing_info = timing_info
|
||||
super().__init__()
|
||||
|
||||
def process(self, record):
|
||||
if self.timing_info is not None:
|
||||
record.extra['timing_info'] = self.timing_info.to_dict()
|
||||
record.extra['timing_info'] = self.timing_info.to_dict(
|
||||
omit_none=True)
|
||||
|
||||
|
||||
class DbtProcessState(logbook.Processor):
|
||||
@@ -350,6 +352,7 @@ def make_log_dir_if_missing(log_dir):
|
||||
class DebugWarnings(logbook.compat.redirected_warnings):
|
||||
"""Log warnings, except send them to 'debug' instead of 'warning' level.
|
||||
"""
|
||||
|
||||
def make_record(self, message, exception, filename, lineno):
|
||||
rv = super().make_record(message, exception, filename, lineno)
|
||||
rv.level = logbook.DEBUG
|
||||
|
||||
@@ -446,6 +446,21 @@ def _build_snapshot_subparser(subparsers, base_subparser):
|
||||
return sub
|
||||
|
||||
|
||||
def _add_defer_argument(*subparsers):
|
||||
for sub in subparsers:
|
||||
sub.add_optional_argument_inverse(
|
||||
'--defer',
|
||||
enable_help='''
|
||||
If set, defer to the state variable for resolving unselected nodes.
|
||||
''',
|
||||
disable_help='''
|
||||
If set, do not defer to the state variable for resolving unselected
|
||||
nodes.
|
||||
''',
|
||||
default=flags.DEFER_MODE,
|
||||
)
|
||||
|
||||
|
||||
def _build_run_subparser(subparsers, base_subparser):
|
||||
run_sub = subparsers.add_parser(
|
||||
'run',
|
||||
@@ -463,19 +478,6 @@ def _build_run_subparser(subparsers, base_subparser):
|
||||
'''
|
||||
)
|
||||
|
||||
# this is a "dbt run"-only thing, for now
|
||||
run_sub.add_optional_argument_inverse(
|
||||
'--defer',
|
||||
enable_help='''
|
||||
If set, defer to the state variable for resolving unselected nodes.
|
||||
''',
|
||||
disable_help='''
|
||||
If set, do not defer to the state variable for resolving unselected
|
||||
nodes.
|
||||
''',
|
||||
default=flags.DEFER_MODE,
|
||||
)
|
||||
|
||||
run_sub.set_defaults(cls=run_task.RunTask, which='run', rpc_method='run')
|
||||
return run_sub
|
||||
|
||||
@@ -1033,6 +1035,8 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
# list_sub sets up its own arguments.
|
||||
_add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub)
|
||||
_add_selection_arguments(snapshot_sub, seed_sub, models_name='select')
|
||||
# --defer
|
||||
_add_defer_argument(run_sub, test_sub)
|
||||
# --full-refresh
|
||||
_add_table_mutability_arguments(run_sub, compile_sub)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List
|
||||
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
|
||||
class NodeType(StrEnum):
|
||||
@@ -46,6 +46,7 @@ class NodeType(StrEnum):
|
||||
cls.Source,
|
||||
cls.Macro,
|
||||
cls.Analysis,
|
||||
cls.Exposure
|
||||
]
|
||||
|
||||
def pluralize(self) -> str:
|
||||
|
||||
@@ -5,12 +5,11 @@ from .docs import DocumentationParser # noqa
|
||||
from .hooks import HookParser # noqa
|
||||
from .macros import MacroParser # noqa
|
||||
from .models import ModelParser # noqa
|
||||
from .results import ParseResult # noqa
|
||||
from .schemas import SchemaParser # noqa
|
||||
from .seeds import SeedParser # noqa
|
||||
from .snapshots import SnapshotParser # noqa
|
||||
|
||||
from . import ( # noqa
|
||||
analysis, base, data_test, docs, hooks, macros, models, results, schemas,
|
||||
analysis, base, data_test, docs, hooks, macros, models, schemas,
|
||||
snapshots
|
||||
)
|
||||
|
||||
@@ -3,17 +3,14 @@ import os
|
||||
from dbt.contracts.graph.parsed import ParsedAnalysisNode
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FilesystemSearcher, FileBlock
|
||||
from dbt.parser.search import FileBlock
|
||||
|
||||
|
||||
class AnalysisParser(SimpleSQLParser[ParsedAnalysisNode]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.analysis_paths, '.sql'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedAnalysisNode:
|
||||
return ParsedAnalysisNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedAnalysisNode.validate(dct)
|
||||
return ParsedAnalysisNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
|
||||
@@ -2,14 +2,13 @@ import abc
|
||||
import itertools
|
||||
import os
|
||||
from typing import (
|
||||
List, Dict, Any, Iterable, Generic, TypeVar
|
||||
List, Dict, Any, Generic, TypeVar
|
||||
)
|
||||
|
||||
from hologram import ValidationError
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
from dbt import utils
|
||||
from dbt.clients.jinja import MacroGenerator
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.context.providers import (
|
||||
generate_parser_model,
|
||||
generate_generate_component_name_macro,
|
||||
@@ -20,18 +19,14 @@ from dbt.config import Project, RuntimeConfig
|
||||
from dbt.context.context_config import (
|
||||
ContextConfig
|
||||
)
|
||||
from dbt.contracts.files import (
|
||||
SourceFile, FilePath, FileHash
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.parsed import HasUniqueID
|
||||
from dbt.contracts.graph.parsed import HasUniqueID, ManifestNodes
|
||||
from dbt.contracts.graph.unparsed import UnparsedNode
|
||||
from dbt.exceptions import (
|
||||
CompilationException, validator_error_message, InternalException
|
||||
)
|
||||
from dbt import hooks
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.results import ParseResult, ManifestNodes
|
||||
from dbt.parser.search import FileBlock
|
||||
|
||||
# internally, the parser may store a less-restrictive type that will be
|
||||
@@ -48,20 +43,9 @@ ConfiguredBlockType = TypeVar('ConfiguredBlockType', bound=FileBlock)
|
||||
|
||||
|
||||
class BaseParser(Generic[FinalValue]):
|
||||
def __init__(self, results: ParseResult, project: Project) -> None:
|
||||
self.results = results
|
||||
def __init__(self, project: Project, manifest: Manifest) -> None:
|
||||
self.project = project
|
||||
# this should be a superset of [x.path for x in self.results.files]
|
||||
# because we fill it via search()
|
||||
self.searched: List[FilePath] = []
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_paths(self) -> Iterable[FilePath]:
|
||||
pass
|
||||
|
||||
def search(self) -> List[FilePath]:
|
||||
self.searched = list(self.get_paths())
|
||||
return self.searched
|
||||
self.manifest = manifest
|
||||
|
||||
@abc.abstractmethod
|
||||
def parse_file(self, block: FileBlock) -> None:
|
||||
@@ -77,38 +61,22 @@ class BaseParser(Generic[FinalValue]):
|
||||
self.project.project_name,
|
||||
resource_name)
|
||||
|
||||
def load_file(
|
||||
self,
|
||||
path: FilePath,
|
||||
*,
|
||||
set_contents: bool = True,
|
||||
) -> SourceFile:
|
||||
file_contents = load_file_contents(path.absolute_path, strip=False)
|
||||
checksum = FileHash.from_contents(file_contents)
|
||||
source_file = SourceFile(path=path, checksum=checksum)
|
||||
if set_contents:
|
||||
source_file.contents = file_contents.strip()
|
||||
else:
|
||||
source_file.contents = ''
|
||||
return source_file
|
||||
|
||||
|
||||
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
|
||||
def __init__(
|
||||
self,
|
||||
results: ParseResult,
|
||||
project: Project,
|
||||
manifest: Manifest,
|
||||
root_project: RuntimeConfig,
|
||||
macro_manifest: Manifest,
|
||||
) -> None:
|
||||
super().__init__(results, project)
|
||||
super().__init__(project, manifest)
|
||||
self.root_project = root_project
|
||||
self.macro_manifest = macro_manifest
|
||||
|
||||
|
||||
class RelationUpdate:
|
||||
def __init__(
|
||||
self, config: RuntimeConfig, manifest: Manifest, component: str
|
||||
self, config: RuntimeConfig, manifest: Manifest,
|
||||
component: str
|
||||
) -> None:
|
||||
macro = manifest.find_generate_macro_by_name(
|
||||
component=component,
|
||||
@@ -141,21 +109,23 @@ class ConfiguredParser(
|
||||
):
|
||||
def __init__(
|
||||
self,
|
||||
results: ParseResult,
|
||||
project: Project,
|
||||
manifest: Manifest,
|
||||
root_project: RuntimeConfig,
|
||||
macro_manifest: Manifest,
|
||||
) -> None:
|
||||
super().__init__(results, project, root_project, macro_manifest)
|
||||
super().__init__(project, manifest, root_project)
|
||||
|
||||
self._update_node_database = RelationUpdate(
|
||||
manifest=macro_manifest, config=root_project, component='database'
|
||||
manifest=manifest, config=root_project,
|
||||
component='database'
|
||||
)
|
||||
self._update_node_schema = RelationUpdate(
|
||||
manifest=macro_manifest, config=root_project, component='schema'
|
||||
manifest=manifest, config=root_project,
|
||||
component='schema'
|
||||
)
|
||||
self._update_node_alias = RelationUpdate(
|
||||
manifest=macro_manifest, config=root_project, component='alias'
|
||||
manifest=manifest, config=root_project,
|
||||
component='alias'
|
||||
)
|
||||
|
||||
@abc.abstractclassmethod
|
||||
@@ -248,11 +218,11 @@ class ConfiguredParser(
|
||||
'raw_sql': block.contents,
|
||||
'unique_id': self.generate_unique_id(name),
|
||||
'config': self.config_dict(config),
|
||||
'checksum': block.file.checksum.to_dict(),
|
||||
'checksum': block.file.checksum.to_dict(omit_none=True),
|
||||
}
|
||||
dct.update(kwargs)
|
||||
try:
|
||||
return self.parse_from_dict(dct)
|
||||
return self.parse_from_dict(dct, validate=True)
|
||||
except ValidationError as exc:
|
||||
msg = validator_error_message(exc)
|
||||
# this is a bit silly, but build an UnparsedNode just for error
|
||||
@@ -269,31 +239,35 @@ class ConfiguredParser(
|
||||
self, parsed_node: IntermediateNode, config: ContextConfig
|
||||
) -> Dict[str, Any]:
|
||||
return generate_parser_model(
|
||||
parsed_node, self.root_project, self.macro_manifest, config
|
||||
parsed_node, self.root_project, self.manifest, config
|
||||
)
|
||||
|
||||
def render_with_context(
|
||||
self, parsed_node: IntermediateNode, config: ContextConfig
|
||||
) -> None:
|
||||
"""Given the parsed node and a ContextConfig to use during parsing,
|
||||
render the node's sql wtih macro capture enabled.
|
||||
# Given the parsed node and a ContextConfig to use during parsing,
|
||||
# render the node's sql wtih macro capture enabled.
|
||||
# Note: this mutates the config object when config calls are rendered.
|
||||
|
||||
Note: this mutates the config object when config() calls are rendered.
|
||||
"""
|
||||
# during parsing, we don't have a connection, but we might need one, so
|
||||
# we have to acquire it.
|
||||
with get_adapter(self.root_project).connection_for(parsed_node):
|
||||
context = self._context_for(parsed_node, config)
|
||||
|
||||
# this goes through the process of rendering, but just throws away
|
||||
# the rendered result. The "macro capture" is the point?
|
||||
get_rendered(
|
||||
parsed_node.raw_sql, context, parsed_node, capture_macros=True
|
||||
)
|
||||
|
||||
# This is taking the original config for the node, converting it to a dict,
|
||||
# updating the config with new config passed in, then re-creating the
|
||||
# config from the dict in the node.
|
||||
def update_parsed_node_config(
|
||||
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
|
||||
) -> None:
|
||||
# Overwrite node config
|
||||
final_config_dict = parsed_node.config.to_dict()
|
||||
final_config_dict = parsed_node.config.to_dict(omit_none=True)
|
||||
final_config_dict.update(config_dict)
|
||||
# re-mangle hooks, in case we got new ones
|
||||
self._mangle_hooks(final_config_dict)
|
||||
@@ -378,9 +352,9 @@ class ConfiguredParser(
|
||||
|
||||
def add_result_node(self, block: FileBlock, node: ManifestNodes):
|
||||
if node.config.enabled:
|
||||
self.results.add_node(block.file, node)
|
||||
self.manifest.add_node(block.file, node)
|
||||
else:
|
||||
self.results.add_disabled(block.file, node)
|
||||
self.manifest.add_disabled(block.file, node)
|
||||
|
||||
def parse_node(self, block: ConfiguredBlockType) -> FinalNode:
|
||||
compiled_path: str = self.get_compiled_path(block)
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
from dbt.contracts.graph.parsed import ParsedDataTestNode
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FilesystemSearcher, FileBlock
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt.utils import get_pseudo_test_path
|
||||
|
||||
|
||||
class DataTestParser(SimpleSQLParser[ParsedDataTestNode]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.test_paths, '.sql'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedDataTestNode:
|
||||
return ParsedDataTestNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedDataTestNode.validate(dct)
|
||||
return ParsedDataTestNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
|
||||
@@ -7,7 +7,7 @@ from dbt.contracts.graph.parsed import ParsedDocumentation
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import Parser
|
||||
from dbt.parser.search import (
|
||||
BlockContents, FileBlock, FilesystemSearcher, BlockSearcher
|
||||
BlockContents, FileBlock, BlockSearcher
|
||||
)
|
||||
|
||||
|
||||
@@ -15,13 +15,6 @@ SHOULD_PARSE_RE = re.compile(r'{[{%]')
|
||||
|
||||
|
||||
class DocumentationParser(Parser[ParsedDocumentation]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
project=self.project,
|
||||
relative_dirs=self.project.docs_paths,
|
||||
extension='.md',
|
||||
)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
return NodeType.Documentation
|
||||
@@ -60,6 +53,4 @@ class DocumentationParser(Parser[ParsedDocumentation]):
|
||||
)
|
||||
for block in searcher:
|
||||
for parsed in self.parse_block(block):
|
||||
self.results.add_doc(file_block.file, parsed)
|
||||
# mark the file as seen, even if there are no macros in it
|
||||
self.results.get_file(file_block.file)
|
||||
self.manifest.add_doc(file_block.file, parsed)
|
||||
|
||||
@@ -70,16 +70,19 @@ class HookParser(SimpleParser[HookBlock, ParsedHookNode]):
|
||||
def transform(self, node):
|
||||
return node
|
||||
|
||||
def get_paths(self) -> List[FilePath]:
|
||||
# Hooks are only in the dbt_project.yml file for the project
|
||||
def get_path(self) -> FilePath:
|
||||
path = FilePath(
|
||||
project_root=self.project.project_root,
|
||||
searched_path='.',
|
||||
relative_path='dbt_project.yml',
|
||||
)
|
||||
return [path]
|
||||
return path
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedHookNode:
|
||||
return ParsedHookNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedHookNode.validate(dct)
|
||||
return ParsedHookNode.from_dict(dct)
|
||||
|
||||
@classmethod
|
||||
def get_compiled_path(cls, block: HookBlock):
|
||||
|
||||
@@ -5,6 +5,7 @@ import jinja2
|
||||
from dbt.clients import jinja
|
||||
from dbt.contracts.graph.unparsed import UnparsedMacro
|
||||
from dbt.contracts.graph.parsed import ParsedMacro
|
||||
from dbt.contracts.files import FilePath
|
||||
from dbt.exceptions import CompilationException
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
@@ -14,12 +15,14 @@ from dbt.utils import MACRO_PREFIX
|
||||
|
||||
|
||||
class MacroParser(BaseParser[ParsedMacro]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
# This is only used when creating a MacroManifest separate
|
||||
# from the normal parsing flow.
|
||||
def get_paths(self) -> List[FilePath]:
|
||||
return list(FilesystemSearcher(
|
||||
project=self.project,
|
||||
relative_dirs=self.project.macro_paths,
|
||||
extension='.sql',
|
||||
)
|
||||
))
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
@@ -53,7 +56,7 @@ class MacroParser(BaseParser[ParsedMacro]):
|
||||
t for t in
|
||||
jinja.extract_toplevel_blocks(
|
||||
base_node.raw_sql,
|
||||
allowed_blocks={'macro', 'materialization'},
|
||||
allowed_blocks={'macro', 'materialization', 'test'},
|
||||
collect_raw_data=False,
|
||||
)
|
||||
if isinstance(t, jinja.BlockTag)
|
||||
@@ -90,7 +93,7 @@ class MacroParser(BaseParser[ParsedMacro]):
|
||||
|
||||
def parse_file(self, block: FileBlock):
|
||||
# mark the file as seen, even if there are no macros in it
|
||||
self.results.get_file(block.file)
|
||||
self.manifest.get_file(block.file)
|
||||
source_file = block.file
|
||||
|
||||
original_file_path = source_file.path.original_file_path
|
||||
@@ -108,4 +111,4 @@ class MacroParser(BaseParser[ParsedMacro]):
|
||||
)
|
||||
|
||||
for node in self.parse_unparsed_macros(base_node):
|
||||
self.results.add_macro(block.file, node)
|
||||
self.manifest.add_macro(block.file, node)
|
||||
|
||||
@@ -1,29 +1,40 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
import os
|
||||
import pickle
|
||||
from typing import (
|
||||
Dict, Optional, Mapping, Callable, Any, List, Type, Union, MutableMapping
|
||||
Dict, Optional, Mapping, Callable, Any, List, Type, Union
|
||||
)
|
||||
import time
|
||||
|
||||
import dbt.exceptions
|
||||
import dbt.tracking
|
||||
import dbt.flags as flags
|
||||
|
||||
from dbt.adapters.factory import (
|
||||
get_adapter,
|
||||
get_relation_class_by_name,
|
||||
get_adapter_package_names,
|
||||
)
|
||||
from dbt.helper_types import PathSet
|
||||
from dbt.logger import GLOBAL_LOGGER as logger, DbtProcessState
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.clients.jinja import get_rendered
|
||||
from dbt.clients.jinja import get_rendered, statically_extract_macro_calls
|
||||
from dbt.clients.system import make_directory
|
||||
from dbt.config import Project, RuntimeConfig
|
||||
from dbt.context.docs import generate_runtime_docs
|
||||
from dbt.contracts.files import FilePath, FileHash
|
||||
from dbt.context.macro_resolver import MacroResolver
|
||||
from dbt.context.base import generate_base_context
|
||||
from dbt.contracts.files import FileHash, ParseFileType
|
||||
from dbt.parser.read_files import read_files, load_source_file
|
||||
from dbt.contracts.graph.compiled import ManifestNode
|
||||
from dbt.contracts.graph.manifest import Manifest, Disabled
|
||||
from dbt.contracts.graph.manifest import (
|
||||
Manifest, Disabled, MacroManifest, ManifestStateCheck
|
||||
)
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedSourceDefinition, ParsedNode, ParsedMacro, ColumnInfo, ParsedExposure
|
||||
)
|
||||
from dbt.contracts.util import Writable
|
||||
from dbt.exceptions import (
|
||||
ref_target_not_found,
|
||||
get_target_not_found_or_disabled_msg,
|
||||
@@ -38,7 +49,6 @@ from dbt.parser.docs import DocumentationParser
|
||||
from dbt.parser.hooks import HookParser
|
||||
from dbt.parser.macros import MacroParser
|
||||
from dbt.parser.models import ModelParser
|
||||
from dbt.parser.results import ParseResult
|
||||
from dbt.parser.schemas import SchemaParser
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt.parser.seeds import SeedParser
|
||||
@@ -47,60 +57,47 @@ from dbt.parser.sources import patch_sources
|
||||
from dbt.ui import warning_tag
|
||||
from dbt.version import __version__
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle'
|
||||
PARSING_STATE = DbtProcessState('parsing')
|
||||
DEFAULT_PARTIAL_PARSE = False
|
||||
|
||||
|
||||
_parser_types: List[Type[Parser]] = [
|
||||
ModelParser,
|
||||
SnapshotParser,
|
||||
AnalysisParser,
|
||||
DataTestParser,
|
||||
HookParser,
|
||||
SeedParser,
|
||||
DocumentationParser,
|
||||
SchemaParser,
|
||||
]
|
||||
# Part of saved performance info
|
||||
@dataclass
|
||||
class ParserInfo(dbtClassMixin):
|
||||
parser: str
|
||||
elapsed: float
|
||||
path_count: int = 0
|
||||
|
||||
|
||||
# TODO: this should be calculated per-file based on the vars() calls made in
|
||||
# parsing, so changing one var doesn't invalidate everything. also there should
|
||||
# be something like that for env_var - currently changing env_vars in way that
|
||||
# impact graph selection or configs will result in weird test failures.
|
||||
# finally, we should hash the actual profile used, not just root project +
|
||||
# profiles.yml + relevant args. While sufficient, it is definitely overkill.
|
||||
def make_parse_result(
|
||||
config: RuntimeConfig, all_projects: Mapping[str, Project]
|
||||
) -> ParseResult:
|
||||
"""Make a ParseResult from the project configuration and the profile."""
|
||||
# if any of these change, we need to reject the parser
|
||||
vars_hash = FileHash.from_contents(
|
||||
'\x00'.join([
|
||||
getattr(config.args, 'vars', '{}') or '{}',
|
||||
getattr(config.args, 'profile', '') or '',
|
||||
getattr(config.args, 'target', '') or '',
|
||||
__version__
|
||||
])
|
||||
)
|
||||
profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
|
||||
with open(profile_path) as fp:
|
||||
profile_hash = FileHash.from_contents(fp.read())
|
||||
|
||||
project_hashes = {}
|
||||
for name, project in all_projects.items():
|
||||
path = os.path.join(project.project_root, 'dbt_project.yml')
|
||||
with open(path) as fp:
|
||||
project_hashes[name] = FileHash.from_contents(fp.read())
|
||||
|
||||
return ParseResult(
|
||||
vars_hash=vars_hash,
|
||||
profile_hash=profile_hash,
|
||||
project_hashes=project_hashes,
|
||||
)
|
||||
# Part of saved performance info
|
||||
@dataclass
|
||||
class ProjectLoaderInfo(dbtClassMixin):
|
||||
project_name: str
|
||||
elapsed: float
|
||||
parsers: List[ParserInfo]
|
||||
path_count: int = 0
|
||||
|
||||
|
||||
# Part of saved performance info
|
||||
@dataclass
|
||||
class ManifestLoaderInfo(dbtClassMixin, Writable):
|
||||
path_count: int = 0
|
||||
is_partial_parse_enabled: Optional[bool] = None
|
||||
read_files_elapsed: Optional[float] = None
|
||||
load_macros_elapsed: Optional[float] = None
|
||||
parse_project_elapsed: Optional[float] = None
|
||||
patch_sources_elapsed: Optional[float] = None
|
||||
process_manifest_elapsed: Optional[float] = None
|
||||
load_all_elapsed: Optional[float] = None
|
||||
projects: List[ProjectLoaderInfo] = field(default_factory=list)
|
||||
|
||||
|
||||
# The ManifestLoader loads the manifest. The standard way to use the
|
||||
# ManifestLoader is using the 'get_full_manifest' class method, but
|
||||
# many tests use abbreviated processes.
|
||||
class ManifestLoader:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -110,164 +107,269 @@ class ManifestLoader:
|
||||
) -> None:
|
||||
self.root_project: RuntimeConfig = root_project
|
||||
self.all_projects: Mapping[str, Project] = all_projects
|
||||
self.manifest: Manifest = Manifest({}, {}, {}, {}, {}, {}, [], {})
|
||||
self.manifest.metadata = root_project.get_metadata()
|
||||
# This is a MacroQueryStringSetter callable, which is called
|
||||
# later after we set the MacroManifest in the adapter. It sets
|
||||
# up the query headers.
|
||||
self.macro_hook: Callable[[Manifest], Any]
|
||||
if macro_hook is None:
|
||||
self.macro_hook = lambda m: None
|
||||
else:
|
||||
self.macro_hook = macro_hook
|
||||
|
||||
self.results: ParseResult = make_parse_result(
|
||||
root_project, all_projects,
|
||||
self._perf_info = ManifestLoaderInfo(
|
||||
is_partial_parse_enabled=self._partial_parse_enabled()
|
||||
)
|
||||
self._loaded_file_cache: Dict[str, FileBlock] = {}
|
||||
partial_parse = self._partial_parse_enabled()
|
||||
self._perf_info: Dict[str, Any] = {
|
||||
'path_count': 0, 'projects': [],
|
||||
'is_partial_parse_enabled': partial_parse}
|
||||
# State check determines whether the old_manifest and the current
|
||||
# manifest match well enough to do partial parsing
|
||||
self.manifest.state_check = self.build_manifest_state_check()
|
||||
# This is a saved manifest from a previous run that's used for partial parsing
|
||||
self.old_manifest: Optional[Manifest] = self.read_saved_manifest()
|
||||
|
||||
# This is the method that builds a complete manifest. We sometimes
|
||||
# use an abbreviated process in tests.
|
||||
@classmethod
|
||||
def get_full_manifest(
|
||||
cls,
|
||||
config: RuntimeConfig,
|
||||
*,
|
||||
reset: bool = False,
|
||||
) -> Manifest:
|
||||
|
||||
adapter = get_adapter(config) # type: ignore
|
||||
# reset is set in a TaskManager load_manifest call, since
|
||||
# the config and adapter may be persistent.
|
||||
if reset:
|
||||
config.clear_dependencies()
|
||||
adapter.clear_macro_manifest()
|
||||
macro_hook = adapter.connections.set_query_header
|
||||
|
||||
with PARSING_STATE: # set up logbook.Processor for parsing
|
||||
# Start performance counting
|
||||
start_load_all = time.perf_counter()
|
||||
|
||||
projects = config.load_dependencies()
|
||||
loader = ManifestLoader(config, projects, macro_hook)
|
||||
loader.load()
|
||||
|
||||
# The goal is to move partial parse writing to after update_manifest
|
||||
loader.write_manifest_for_partial_parse()
|
||||
manifest = loader.update_manifest()
|
||||
# Move write_manifest_for_partial_parse here
|
||||
|
||||
_check_manifest(manifest, config)
|
||||
manifest.build_flat_graph()
|
||||
|
||||
# This needs to happen after loading from a partial parse,
|
||||
# so that the adapter has the query headers from the macro_hook.
|
||||
loader.save_macros_to_adapter(adapter)
|
||||
|
||||
# Save performance info
|
||||
loader._perf_info.load_all_elapsed = (
|
||||
time.perf_counter() - start_load_all
|
||||
)
|
||||
loader.track_project_load()
|
||||
|
||||
return manifest
|
||||
|
||||
# This is where the main action happens
|
||||
def load(self):
|
||||
|
||||
if self.old_manifest is not None:
|
||||
logger.debug('Got an acceptable saved parse result')
|
||||
|
||||
# Read files creates a dictionary of projects to a dictionary
|
||||
# of parsers to lists of file strings. The file strings are
|
||||
# used to get the SourceFiles from the manifest files.
|
||||
# In the future the loaded files will be used to control
|
||||
# partial parsing, but right now we're just moving the
|
||||
# file loading out of the individual parsers and doing it
|
||||
# all at once.
|
||||
start_read_files = time.perf_counter()
|
||||
project_parser_files = {}
|
||||
for project in self.all_projects.values():
|
||||
read_files(project, self.manifest.files, project_parser_files)
|
||||
self._perf_info.read_files_elapsed = (time.perf_counter() - start_read_files)
|
||||
|
||||
# We need to parse the macros first, so they're resolvable when
|
||||
# the other files are loaded
|
||||
start_load_macros = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
parser = MacroParser(project, self.manifest)
|
||||
parser_files = project_parser_files[project.project_name]
|
||||
for search_key in parser_files['MacroParser']:
|
||||
block = FileBlock(self.manifest.files[search_key])
|
||||
self.parse_with_cache(block, parser)
|
||||
self.reparse_macros()
|
||||
# This is where a loop over self.manifest.macros should be performed
|
||||
# to set the 'depends_on' information from static rendering.
|
||||
self._perf_info.load_macros_elapsed = (time.perf_counter() - start_load_macros)
|
||||
|
||||
# Now that the macros are parsed, parse the rest of the files.
|
||||
# This is currently done on a per project basis,
|
||||
# but in the future we may change that
|
||||
start_parse_projects = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
self.parse_project(project, project_parser_files[project.project_name])
|
||||
self._perf_info.parse_project_elapsed = (time.perf_counter() - start_parse_projects)
|
||||
|
||||
# Parse every file in this project, except macros (already done)
|
||||
def parse_project(
|
||||
self,
|
||||
project: Project,
|
||||
parser_files
|
||||
) -> None:
|
||||
|
||||
project_parser_info: List[ParserInfo] = []
|
||||
start_timer = time.perf_counter()
|
||||
total_path_count = 0
|
||||
|
||||
# Loop through parsers with loaded files. Note: SchemaParser must be last
|
||||
parser_types: List[Type[Parser]] = [
|
||||
ModelParser, SnapshotParser, AnalysisParser, DataTestParser,
|
||||
SeedParser, DocumentationParser, SchemaParser]
|
||||
for parser_cls in parser_types:
|
||||
parser_name = parser_cls.__name__
|
||||
# No point in creating a parser if we don't have files for it
|
||||
if parser_name not in parser_files or not parser_files[parser_name]:
|
||||
continue
|
||||
|
||||
# Initialize timing info
|
||||
parser_path_count = 0
|
||||
parser_start_timer = time.perf_counter()
|
||||
|
||||
# Parse the project files for this parser
|
||||
parser: Parser = parser_cls(project, self.manifest, self.root_project)
|
||||
for search_key in parser_files[parser_name]:
|
||||
block = FileBlock(self.manifest.files[search_key])
|
||||
self.parse_with_cache(block, parser)
|
||||
parser_path_count = parser_path_count + 1
|
||||
|
||||
# Save timing info
|
||||
project_parser_info.append(ParserInfo(
|
||||
parser=parser.resource_type,
|
||||
path_count=parser_path_count,
|
||||
elapsed=time.perf_counter() - parser_start_timer
|
||||
))
|
||||
total_path_count = total_path_count + parser_path_count
|
||||
|
||||
# HookParser doesn't run from loaded files, just dbt_project.yml,
|
||||
# so do separately
|
||||
hook_parser = HookParser(project, self.manifest, self.root_project)
|
||||
path = hook_parser.get_path()
|
||||
file_block = FileBlock(load_source_file(path, ParseFileType.Hook, project.project_name))
|
||||
self.parse_with_cache(file_block, hook_parser)
|
||||
|
||||
# Store the performance info
|
||||
elapsed = time.perf_counter() - start_timer
|
||||
project_info = ProjectLoaderInfo(
|
||||
project_name=project.project_name,
|
||||
path_count=total_path_count,
|
||||
elapsed=elapsed,
|
||||
parsers=project_parser_info
|
||||
)
|
||||
self._perf_info.projects.append(project_info)
|
||||
self._perf_info.path_count = (
|
||||
self._perf_info.path_count + total_path_count
|
||||
)
|
||||
|
||||
# Loop through macros in the manifest and statically parse
|
||||
# the 'macro_sql' to find depends_on.macros
|
||||
def reparse_macros(self):
|
||||
internal_package_names = get_adapter_package_names(
|
||||
self.root_project.credentials.type
|
||||
)
|
||||
macro_resolver = MacroResolver(
|
||||
self.manifest.macros,
|
||||
self.root_project.project_name,
|
||||
internal_package_names
|
||||
)
|
||||
base_ctx = generate_base_context({})
|
||||
for macro in self.manifest.macros.values():
|
||||
possible_macro_calls = statically_extract_macro_calls(macro.macro_sql, base_ctx)
|
||||
for macro_name in possible_macro_calls:
|
||||
# adapter.dispatch calls can generate a call with the same name as the macro
|
||||
# it ought to be an adapter prefix (postgres_) or default_
|
||||
if macro_name == macro.name:
|
||||
continue
|
||||
dep_macro_id = macro_resolver.get_macro_id(macro.package_name, macro_name)
|
||||
if dep_macro_id:
|
||||
macro.depends_on.add_macro(dep_macro_id) # will check for dupes
|
||||
|
||||
# This is where we use the partial-parse state from the
|
||||
# pickle file (if it exists)
|
||||
def parse_with_cache(
|
||||
self,
|
||||
path: FilePath,
|
||||
block: FileBlock,
|
||||
parser: BaseParser,
|
||||
old_results: Optional[ParseResult],
|
||||
) -> None:
|
||||
block = self._get_file(path, parser)
|
||||
if not self._get_cached(block, old_results, parser):
|
||||
# _get_cached actually copies the nodes, etc, that were
|
||||
# generated from the file to the results, in 'sanitized_update'
|
||||
if not self._get_cached(block, parser):
|
||||
parser.parse_file(block)
|
||||
|
||||
# check if we have a stored parse file, then check if
|
||||
# file checksums are the same or not and either return
|
||||
# the old ... stuff or return false (not cached)
|
||||
def _get_cached(
|
||||
self,
|
||||
block: FileBlock,
|
||||
old_results: Optional[ParseResult],
|
||||
parser: BaseParser,
|
||||
) -> bool:
|
||||
# TODO: handle multiple parsers w/ same files, by
|
||||
# tracking parser type vs node type? Or tracking actual
|
||||
# parser type during parsing?
|
||||
if old_results is None:
|
||||
if self.old_manifest is None:
|
||||
return False
|
||||
if old_results.has_file(block.file):
|
||||
return self.results.sanitized_update(
|
||||
block.file, old_results, parser.resource_type
|
||||
# The 'has_file' method is where we check to see if
|
||||
# the checksum of the old file is the same as the new
|
||||
# file. If the checksum is different, 'has_file' returns
|
||||
# false. If it's the same, the file and the things that
|
||||
# were generated from it are used.
|
||||
if self.old_manifest.has_file(block.file):
|
||||
return self.manifest.sanitized_update(
|
||||
block.file, self.old_manifest, parser.resource_type
|
||||
)
|
||||
return False
|
||||
|
||||
def _get_file(self, path: FilePath, parser: BaseParser) -> FileBlock:
|
||||
if path.search_key in self._loaded_file_cache:
|
||||
block = self._loaded_file_cache[path.search_key]
|
||||
else:
|
||||
block = FileBlock(file=parser.load_file(path))
|
||||
self._loaded_file_cache[path.search_key] = block
|
||||
return block
|
||||
|
||||
def parse_project(
|
||||
self,
|
||||
project: Project,
|
||||
macro_manifest: Manifest,
|
||||
old_results: Optional[ParseResult],
|
||||
) -> None:
|
||||
parsers: List[Parser] = []
|
||||
for cls in _parser_types:
|
||||
parser = cls(self.results, project, self.root_project,
|
||||
macro_manifest)
|
||||
parsers.append(parser)
|
||||
|
||||
# per-project cache.
|
||||
self._loaded_file_cache.clear()
|
||||
|
||||
project_info: Dict[str, Any] = {'parsers': []}
|
||||
start_timer = time.perf_counter()
|
||||
total_path_count = 0
|
||||
for parser in parsers:
|
||||
parser_path_count = 0
|
||||
parser_start_timer = time.perf_counter()
|
||||
for path in parser.search():
|
||||
self.parse_with_cache(path, parser, old_results)
|
||||
parser_path_count = parser_path_count + 1
|
||||
if parser_path_count % 100 == 0:
|
||||
print("..", end='', flush=True)
|
||||
|
||||
if parser_path_count > 0:
|
||||
parser_elapsed = time.perf_counter() - parser_start_timer
|
||||
project_info['parsers'].append({'parser': type(
|
||||
parser).__name__, 'path_count': parser_path_count,
|
||||
'elapsed': '{:.2f}'.format(parser_elapsed)})
|
||||
total_path_count = total_path_count + parser_path_count
|
||||
if total_path_count > 100:
|
||||
print("..")
|
||||
|
||||
elapsed = time.perf_counter() - start_timer
|
||||
project_info['project_name'] = project.project_name
|
||||
project_info['path_count'] = total_path_count
|
||||
project_info['elapsed'] = '{:.2f}'.format(elapsed)
|
||||
self._perf_info['projects'].append(project_info)
|
||||
self._perf_info['path_count'] = self._perf_info['path_count'] + \
|
||||
total_path_count
|
||||
|
||||
def load_only_macros(self) -> Manifest:
|
||||
old_results = self.read_parse_results()
|
||||
|
||||
for project in self.all_projects.values():
|
||||
parser = MacroParser(self.results, project)
|
||||
for path in parser.search():
|
||||
self.parse_with_cache(path, parser, old_results)
|
||||
|
||||
# make a manifest with just the macros to get the context
|
||||
macro_manifest = Manifest.from_macros(
|
||||
macros=self.results.macros,
|
||||
files=self.results.files
|
||||
)
|
||||
self.macro_hook(macro_manifest)
|
||||
return macro_manifest
|
||||
|
||||
def load(self, macro_manifest: Manifest):
|
||||
old_results = self.read_parse_results()
|
||||
if old_results is not None:
|
||||
logger.debug('Got an acceptable cached parse result')
|
||||
self.results.macros.update(macro_manifest.macros)
|
||||
self.results.files.update(macro_manifest.files)
|
||||
|
||||
start_timer = time.perf_counter()
|
||||
for project in self.all_projects.values():
|
||||
# parse a single project
|
||||
self.parse_project(project, macro_manifest, old_results)
|
||||
self._perf_info['parse_project_elapsed'] = '{:.2f}'.format(
|
||||
time.perf_counter() - start_timer)
|
||||
|
||||
def write_parse_results(self):
|
||||
def write_manifest_for_partial_parse(self):
|
||||
path = os.path.join(self.root_project.target_path,
|
||||
PARTIAL_PARSE_FILE_NAME)
|
||||
make_directory(self.root_project.target_path)
|
||||
with open(path, 'wb') as fp:
|
||||
pickle.dump(self.results, fp)
|
||||
pickle.dump(self.manifest, fp)
|
||||
|
||||
def matching_parse_results(self, result: ParseResult) -> bool:
|
||||
def matching_parse_results(self, manifest: Manifest) -> bool:
|
||||
"""Compare the global hashes of the read-in parse results' values to
|
||||
the known ones, and return if it is ok to re-use the results.
|
||||
"""
|
||||
try:
|
||||
if result.dbt_version != __version__:
|
||||
if manifest.metadata.dbt_version != __version__:
|
||||
logger.debug(
|
||||
'dbt version mismatch: {} != {}, cache invalidated'
|
||||
.format(result.dbt_version, __version__)
|
||||
.format(manifest.metadata.dbt_version, __version__)
|
||||
)
|
||||
return False
|
||||
except AttributeError:
|
||||
logger.debug('malformed result file, cache invalidated')
|
||||
except AttributeError as exc:
|
||||
logger.debug(f"malformed result file, cache invalidated: {exc}")
|
||||
return False
|
||||
|
||||
valid = True
|
||||
|
||||
if self.results.vars_hash != result.vars_hash:
|
||||
if not self.manifest.state_check or not manifest.state_check:
|
||||
return False
|
||||
|
||||
if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
|
||||
logger.debug('vars hash mismatch, cache invalidated')
|
||||
valid = False
|
||||
if self.results.profile_hash != result.profile_hash:
|
||||
if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
|
||||
logger.debug('profile hash mismatch, cache invalidated')
|
||||
valid = False
|
||||
|
||||
missing_keys = {
|
||||
k for k in self.results.project_hashes
|
||||
if k not in result.project_hashes
|
||||
k for k in self.manifest.state_check.project_hashes
|
||||
if k not in manifest.state_check.project_hashes
|
||||
}
|
||||
if missing_keys:
|
||||
logger.debug(
|
||||
@@ -276,9 +378,9 @@ class ManifestLoader:
|
||||
)
|
||||
valid = False
|
||||
|
||||
for key, new_value in self.results.project_hashes.items():
|
||||
if key in result.project_hashes:
|
||||
old_value = result.project_hashes[key]
|
||||
for key, new_value in self.manifest.state_check.project_hashes.items():
|
||||
if key in manifest.state_check.project_hashes:
|
||||
old_value = manifest.state_check.project_hashes[key]
|
||||
if new_value != old_value:
|
||||
logger.debug(
|
||||
'For key {}, hash mismatch ({} -> {}), cache '
|
||||
@@ -298,7 +400,7 @@ class ManifestLoader:
|
||||
else:
|
||||
return DEFAULT_PARTIAL_PARSE
|
||||
|
||||
def read_parse_results(self) -> Optional[ParseResult]:
|
||||
def read_saved_manifest(self) -> Optional[Manifest]:
|
||||
if not self._partial_parse_enabled():
|
||||
logger.debug('Partial parsing not enabled')
|
||||
return None
|
||||
@@ -308,82 +410,130 @@ class ManifestLoader:
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
with open(path, 'rb') as fp:
|
||||
result: ParseResult = pickle.load(fp)
|
||||
manifest: Manifest = pickle.load(fp)
|
||||
# keep this check inside the try/except in case something about
|
||||
# the file has changed in weird ways, perhaps due to being a
|
||||
# different version of dbt
|
||||
if self.matching_parse_results(result):
|
||||
return result
|
||||
if self.matching_parse_results(manifest):
|
||||
return manifest
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
'Failed to load parsed file from disk at {}: {}'
|
||||
.format(path, exc),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def process_manifest(self, manifest: Manifest):
|
||||
# This find the sources, refs, and docs and resolves them
|
||||
# for nodes and exposures
|
||||
def process_manifest(self):
|
||||
project_name = self.root_project.project_name
|
||||
process_sources(manifest, project_name)
|
||||
process_refs(manifest, project_name)
|
||||
process_docs(manifest, self.root_project)
|
||||
process_sources(self.manifest, project_name)
|
||||
process_refs(self.manifest, project_name)
|
||||
process_docs(self.manifest, self.root_project)
|
||||
|
||||
def create_manifest(self) -> Manifest:
|
||||
# before we do anything else, patch the sources. This mutates
|
||||
# results.disabled, so it needs to come before the final 'disabled'
|
||||
# list is created
|
||||
def update_manifest(self) -> Manifest:
|
||||
start_patch = time.perf_counter()
|
||||
sources = patch_sources(self.results, self.root_project)
|
||||
self._perf_info['patch_sources_elapsed'] = '{:.2f}'.format(
|
||||
time.perf_counter() - start_patch)
|
||||
# patch_sources converts the UnparsedSourceDefinitions in the
|
||||
# Manifest.sources to ParsedSourceDefinition via 'patch_source'
|
||||
# in SourcePatcher
|
||||
sources = patch_sources(self.root_project, self.manifest)
|
||||
self.manifest.sources = sources
|
||||
# ParseResults had a 'disabled' attribute which was a dictionary
|
||||
# which is now named '_disabled'. This used to copy from
|
||||
# ParseResults to the Manifest. Can this be normalized so
|
||||
# there's only one disabled?
|
||||
disabled = []
|
||||
for value in self.results.disabled.values():
|
||||
for value in self.manifest._disabled.values():
|
||||
disabled.extend(value)
|
||||
|
||||
nodes: MutableMapping[str, ManifestNode] = {
|
||||
k: v for k, v in self.results.nodes.items()
|
||||
}
|
||||
|
||||
manifest = Manifest(
|
||||
nodes=nodes,
|
||||
sources=sources,
|
||||
macros=self.results.macros,
|
||||
docs=self.results.docs,
|
||||
exposures=self.results.exposures,
|
||||
metadata=self.root_project.get_metadata(),
|
||||
disabled=disabled,
|
||||
files=self.results.files,
|
||||
selectors=self.root_project.manifest_selectors,
|
||||
self.manifest.disabled = disabled
|
||||
self._perf_info.patch_sources_elapsed = (
|
||||
time.perf_counter() - start_patch
|
||||
)
|
||||
manifest.patch_nodes(self.results.patches)
|
||||
manifest.patch_macros(self.results.macro_patches)
|
||||
|
||||
self.manifest.selectors = self.root_project.manifest_selectors
|
||||
|
||||
# do the node and macro patches
|
||||
self.manifest.patch_nodes()
|
||||
self.manifest.patch_macros()
|
||||
|
||||
# process_manifest updates the refs, sources, and docs
|
||||
start_process = time.perf_counter()
|
||||
self.process_manifest(manifest)
|
||||
self._perf_info['process_manifest_elapsed'] = '{:.2f}'.format(
|
||||
time.perf_counter() - start_process)
|
||||
return manifest
|
||||
self.process_manifest()
|
||||
|
||||
@classmethod
|
||||
def load_all(
|
||||
cls,
|
||||
root_config: RuntimeConfig,
|
||||
macro_manifest: Manifest,
|
||||
macro_hook: Callable[[Manifest], Any],
|
||||
) -> Manifest:
|
||||
with PARSING_STATE:
|
||||
start_load_all = time.perf_counter()
|
||||
projects = root_config.load_dependencies()
|
||||
loader = cls(root_config, projects, macro_hook)
|
||||
loader.load(macro_manifest=macro_manifest)
|
||||
loader.write_parse_results()
|
||||
manifest = loader.create_manifest()
|
||||
_check_manifest(manifest, root_config)
|
||||
manifest.build_flat_graph()
|
||||
loader._perf_info['load_all_elapsed'] = '{:.2f}'.format(
|
||||
time.perf_counter() - start_load_all)
|
||||
return manifest
|
||||
self._perf_info.process_manifest_elapsed = (
|
||||
time.perf_counter() - start_process
|
||||
)
|
||||
|
||||
return self.manifest
|
||||
|
||||
# TODO: this should be calculated per-file based on the vars() calls made in
|
||||
# parsing, so changing one var doesn't invalidate everything. also there should
|
||||
# be something like that for env_var - currently changing env_vars in way that
|
||||
# impact graph selection or configs will result in weird test failures.
|
||||
# finally, we should hash the actual profile used, not just root project +
|
||||
# profiles.yml + relevant args. While sufficient, it is definitely overkill.
|
||||
def build_manifest_state_check(self):
|
||||
config = self.root_project
|
||||
all_projects = self.all_projects
|
||||
# if any of these change, we need to reject the parser
|
||||
vars_hash = FileHash.from_contents(
|
||||
'\x00'.join([
|
||||
getattr(config.args, 'vars', '{}') or '{}',
|
||||
getattr(config.args, 'profile', '') or '',
|
||||
getattr(config.args, 'target', '') or '',
|
||||
__version__
|
||||
])
|
||||
)
|
||||
|
||||
profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
|
||||
with open(profile_path) as fp:
|
||||
profile_hash = FileHash.from_contents(fp.read())
|
||||
|
||||
project_hashes = {}
|
||||
for name, project in all_projects.items():
|
||||
path = os.path.join(project.project_root, 'dbt_project.yml')
|
||||
with open(path) as fp:
|
||||
project_hashes[name] = FileHash.from_contents(fp.read())
|
||||
|
||||
state_check = ManifestStateCheck(
|
||||
vars_hash=vars_hash,
|
||||
profile_hash=profile_hash,
|
||||
project_hashes=project_hashes,
|
||||
)
|
||||
return state_check
|
||||
|
||||
def save_macros_to_adapter(self, adapter):
|
||||
macro_manifest = MacroManifest(self.manifest.macros)
|
||||
adapter._macro_manifest_lazy = macro_manifest
|
||||
# This executes the callable macro_hook and sets the
|
||||
# query headers
|
||||
self.macro_hook(macro_manifest)
|
||||
|
||||
# This creates a MacroManifest which contains the macros in
|
||||
# the adapter. Only called by the load_macros call from the
|
||||
# adapter.
|
||||
def create_macro_manifest(self):
|
||||
for project in self.all_projects.values():
|
||||
# what is the manifest passed in actually used for?
|
||||
macro_parser = MacroParser(project, self.manifest)
|
||||
for path in macro_parser.get_paths():
|
||||
source_file = load_source_file(
|
||||
path, ParseFileType.Macro, project.project_name)
|
||||
block = FileBlock(source_file)
|
||||
# This does not add the file to the manifest.files,
|
||||
# but that shouldn't be necessary here.
|
||||
self.parse_with_cache(block, macro_parser)
|
||||
macro_manifest = MacroManifest(self.manifest.macros)
|
||||
return macro_manifest
|
||||
|
||||
# This is called by the adapter code only, to create the
|
||||
# MacroManifest that's stored in the adapter.
|
||||
# 'get_full_manifest' uses a persistent ManifestLoader while this
|
||||
# creates a temporary ManifestLoader and throws it away.
|
||||
# Not sure when this would actually get used except in tests.
|
||||
# The ManifestLoader loads macros with other files, then copies
|
||||
# into the adapter MacroManifest.
|
||||
@classmethod
|
||||
def load_macros(
|
||||
cls,
|
||||
@@ -392,8 +542,33 @@ class ManifestLoader:
|
||||
) -> Manifest:
|
||||
with PARSING_STATE:
|
||||
projects = root_config.load_dependencies()
|
||||
# This creates a loader object, including result,
|
||||
# and then throws it away, returning only the
|
||||
# manifest
|
||||
loader = cls(root_config, projects, macro_hook)
|
||||
return loader.load_only_macros()
|
||||
macro_manifest = loader.create_macro_manifest()
|
||||
|
||||
return macro_manifest
|
||||
|
||||
# Create tracking event for saving performance info
|
||||
def track_project_load(self):
|
||||
invocation_id = dbt.tracking.active_user.invocation_id
|
||||
dbt.tracking.track_project_load({
|
||||
"invocation_id": invocation_id,
|
||||
"project_id": self.root_project.hashed_name(),
|
||||
"path_count": self._perf_info.path_count,
|
||||
"read_files_elapsed": self._perf_info.read_files_elapsed,
|
||||
"load_macros_elapsed": self._perf_info.load_macros_elapsed,
|
||||
"parse_project_elapsed": self._perf_info.parse_project_elapsed,
|
||||
"patch_sources_elapsed": self._perf_info.patch_sources_elapsed,
|
||||
"process_manifest_elapsed": (
|
||||
self._perf_info.process_manifest_elapsed
|
||||
),
|
||||
"load_all_elapsed": self._perf_info.load_all_elapsed,
|
||||
"is_partial_parse_enabled": (
|
||||
self._perf_info.is_partial_parse_enabled
|
||||
),
|
||||
})
|
||||
|
||||
|
||||
def invalid_ref_fail_unless_test(node, target_model_name,
|
||||
@@ -490,6 +665,7 @@ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
|
||||
_warn_for_unused_resource_config_paths(manifest, config)
|
||||
|
||||
|
||||
# This is just used in test cases
|
||||
def _load_projects(config, paths):
|
||||
for path in paths:
|
||||
try:
|
||||
@@ -523,6 +699,7 @@ DocsContextCallback = Callable[
|
||||
]
|
||||
|
||||
|
||||
# node and column descriptions
|
||||
def _process_docs_for_node(
|
||||
context: Dict[str, Any],
|
||||
node: ManifestNode,
|
||||
@@ -532,6 +709,7 @@ def _process_docs_for_node(
|
||||
column.description = get_rendered(column.description, context)
|
||||
|
||||
|
||||
# source and table descriptions, column descriptions
|
||||
def _process_docs_for_source(
|
||||
context: Dict[str, Any],
|
||||
source: ParsedSourceDefinition,
|
||||
@@ -549,6 +727,7 @@ def _process_docs_for_source(
|
||||
column.description = column_desc
|
||||
|
||||
|
||||
# macro argument descriptions
|
||||
def _process_docs_for_macro(
|
||||
context: Dict[str, Any], macro: ParsedMacro
|
||||
) -> None:
|
||||
@@ -557,6 +736,17 @@ def _process_docs_for_macro(
|
||||
arg.description = get_rendered(arg.description, context)
|
||||
|
||||
|
||||
# exposure descriptions
|
||||
def _process_docs_for_exposure(
|
||||
context: Dict[str, Any], exposure: ParsedExposure
|
||||
) -> None:
|
||||
exposure.description = get_rendered(exposure.description, context)
|
||||
|
||||
|
||||
# nodes: node and column descriptions
|
||||
# sources: source and table descriptions, column descriptions
|
||||
# macros: macro argument descriptions
|
||||
# exposures: exposure descriptions
|
||||
def process_docs(manifest: Manifest, config: RuntimeConfig):
|
||||
for node in manifest.nodes.values():
|
||||
ctx = generate_runtime_docs(
|
||||
@@ -582,6 +772,14 @@ def process_docs(manifest: Manifest, config: RuntimeConfig):
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_macro(ctx, macro)
|
||||
for exposure in manifest.exposures.values():
|
||||
ctx = generate_runtime_docs(
|
||||
config,
|
||||
exposure,
|
||||
manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_exposure(ctx, exposure)
|
||||
|
||||
|
||||
def _process_refs_for_exposure(
|
||||
@@ -667,9 +865,12 @@ def _process_refs_for_node(
|
||||
# TODO: I think this is extraneous, node should already be the same
|
||||
# as manifest.nodes[node.unique_id] (we're mutating node here, not
|
||||
# making a new one)
|
||||
# Q: could we stop doing this?
|
||||
manifest.update_node(node)
|
||||
|
||||
|
||||
# Takes references in 'refs' array of nodes and exposures, finds the target
|
||||
# node, and updates 'depends_on.nodes' with the unique id
|
||||
def process_refs(manifest: Manifest, current_project: str):
|
||||
for node in manifest.nodes.values():
|
||||
_process_refs_for_node(manifest, current_project, node)
|
||||
@@ -729,6 +930,9 @@ def _process_sources_for_node(
|
||||
manifest.update_node(node)
|
||||
|
||||
|
||||
# Loops through all nodes and exposures, for each element in
|
||||
# 'sources' array finds the source node and updates the
|
||||
# 'depends_on.nodes' array with the unique id
|
||||
def process_sources(manifest: Manifest, current_project: str):
|
||||
for node in manifest.nodes.values():
|
||||
if node.resource_type == NodeType.Source:
|
||||
@@ -740,6 +944,8 @@ def process_sources(manifest: Manifest, current_project: str):
|
||||
return manifest
|
||||
|
||||
|
||||
# This is called in task.rpc.sql_commands when a "dynamic" node is
|
||||
# created in the manifest, in 'add_refs'
|
||||
def process_macro(
|
||||
config: RuntimeConfig, manifest: Manifest, macro: ParsedMacro
|
||||
) -> None:
|
||||
@@ -752,6 +958,8 @@ def process_macro(
|
||||
_process_docs_for_macro(ctx, macro)
|
||||
|
||||
|
||||
# This is called in task.rpc.sql_commands when a "dynamic" node is
|
||||
# created in the manifest, in 'add_refs'
|
||||
def process_node(
|
||||
config: RuntimeConfig, manifest: Manifest, node: ManifestNode
|
||||
):
|
||||
@@ -762,18 +970,3 @@ def process_node(
|
||||
_process_refs_for_node(manifest, config.project_name, node)
|
||||
ctx = generate_runtime_docs(config, node, manifest, config.project_name)
|
||||
_process_docs_for_node(ctx, node)
|
||||
|
||||
|
||||
def load_macro_manifest(
|
||||
config: RuntimeConfig,
|
||||
macro_hook: Callable[[Manifest], Any],
|
||||
) -> Manifest:
|
||||
return ManifestLoader.load_macros(config, macro_hook)
|
||||
|
||||
|
||||
def load_manifest(
|
||||
config: RuntimeConfig,
|
||||
macro_manifest: Manifest,
|
||||
macro_hook: Callable[[Manifest], Any],
|
||||
) -> Manifest:
|
||||
return ManifestLoader.load_all(config, macro_manifest, macro_hook)
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
from dbt.contracts.graph.parsed import ParsedModelNode
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FilesystemSearcher, FileBlock
|
||||
from dbt.parser.search import FileBlock
|
||||
|
||||
|
||||
class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.source_paths, '.sql'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedModelNode:
|
||||
return ParsedModelNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedModelNode.validate(dct)
|
||||
return ParsedModelNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
|
||||
111
core/dbt/parser/read_files.py
Normal file
111
core/dbt/parser/read_files.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.contracts.files import FilePath, ParseFileType, SourceFile, FileHash
|
||||
|
||||
from dbt.parser.search import FilesystemSearcher
|
||||
|
||||
|
||||
# This loads the files contents and creates the SourceFile object
|
||||
def load_source_file(
|
||||
path: FilePath, parse_file_type: ParseFileType,
|
||||
project_name: str) -> SourceFile:
|
||||
file_contents = load_file_contents(path.absolute_path, strip=False)
|
||||
checksum = FileHash.from_contents(file_contents)
|
||||
source_file = SourceFile(path=path, checksum=checksum,
|
||||
parse_file_type=parse_file_type, project_name=project_name)
|
||||
source_file.contents = file_contents.strip()
|
||||
return source_file
|
||||
|
||||
|
||||
# Special processing for big seed files
|
||||
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
|
||||
if match.seed_too_large():
|
||||
# We don't want to calculate a hash of this file. Use the path.
|
||||
source_file = SourceFile.big_seed(match)
|
||||
else:
|
||||
file_contents = load_file_contents(match.absolute_path, strip=False)
|
||||
checksum = FileHash.from_contents(file_contents)
|
||||
source_file = SourceFile(path=match, checksum=checksum)
|
||||
source_file.contents = ''
|
||||
source_file.parse_file_type = ParseFileType.Seed
|
||||
source_file.project_name = project_name
|
||||
return source_file
|
||||
|
||||
|
||||
# Use the FilesystemSearcher to get a bunch of FilePaths, then turn
|
||||
# them into a bunch of FileSource objects
|
||||
def get_source_files(project, paths, extension, parse_file_type):
|
||||
# file path list
|
||||
fp_list = list(FilesystemSearcher(
|
||||
project, paths, extension
|
||||
))
|
||||
# file block list
|
||||
fb_list = []
|
||||
for fp in fp_list:
|
||||
if parse_file_type == ParseFileType.Seed:
|
||||
fb_list.append(load_seed_source_file(fp, project.project_name))
|
||||
else:
|
||||
fb_list.append(load_source_file(
|
||||
fp, parse_file_type, project.project_name))
|
||||
return fb_list
|
||||
|
||||
|
||||
def read_files_for_parser(project, files, dirs, extension, parse_ft):
|
||||
parser_files = []
|
||||
source_files = get_source_files(
|
||||
project, dirs, extension, parse_ft
|
||||
)
|
||||
for sf in source_files:
|
||||
files[sf.search_key] = sf
|
||||
parser_files.append(sf.search_key)
|
||||
return parser_files
|
||||
|
||||
|
||||
# This needs to read files for multiple projects, so the 'files'
|
||||
# dictionary needs to be passed in. What determines the order of
|
||||
# the various projects? Is the root project always last? Do the
|
||||
# non-root projects need to be done separately in order?
|
||||
def read_files(project, files, parser_files):
|
||||
|
||||
project_files = {}
|
||||
|
||||
project_files['MacroParser'] = read_files_for_parser(
|
||||
project, files, project.macro_paths, '.sql', ParseFileType.Macro,
|
||||
)
|
||||
|
||||
project_files['ModelParser'] = read_files_for_parser(
|
||||
project, files, project.source_paths, '.sql', ParseFileType.Model,
|
||||
)
|
||||
|
||||
project_files['SnapshotParser'] = read_files_for_parser(
|
||||
project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot,
|
||||
)
|
||||
|
||||
project_files['AnalysisParser'] = read_files_for_parser(
|
||||
project, files, project.analysis_paths, '.sql', ParseFileType.Analysis,
|
||||
)
|
||||
|
||||
project_files['DataTestParser'] = read_files_for_parser(
|
||||
project, files, project.test_paths, '.sql', ParseFileType.Test,
|
||||
)
|
||||
|
||||
project_files['SeedParser'] = read_files_for_parser(
|
||||
project, files, project.data_paths, '.csv', ParseFileType.Seed,
|
||||
)
|
||||
|
||||
project_files['DocumentationParser'] = read_files_for_parser(
|
||||
project, files, project.docs_paths, '.md', ParseFileType.Documentation,
|
||||
)
|
||||
|
||||
project_files['SchemaParser'] = read_files_for_parser(
|
||||
project, files, project.all_source_paths, '.yml', ParseFileType.Schema,
|
||||
)
|
||||
|
||||
# Also read .yaml files for schema files. Might be better to change
|
||||
# 'read_files_for_parser' accept an array in the future.
|
||||
yaml_files = read_files_for_parser(
|
||||
project, files, project.all_source_paths, '.yaml', ParseFileType.Schema,
|
||||
)
|
||||
project_files['SchemaParser'].extend(yaml_files)
|
||||
|
||||
# Store the parser files for this particular project
|
||||
parser_files[project.project_name] = project_files
|
||||
@@ -1,328 +0,0 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TypeVar, MutableMapping, Mapping, Union, List
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
|
||||
from dbt.contracts.files import RemoteFile, FileHash, SourceFile
|
||||
from dbt.contracts.graph.compiled import CompileResultNode
|
||||
from dbt.contracts.graph.parsed import (
|
||||
HasUniqueID,
|
||||
ParsedAnalysisNode,
|
||||
ParsedDataTestNode,
|
||||
ParsedDocumentation,
|
||||
ParsedHookNode,
|
||||
ParsedMacro,
|
||||
ParsedMacroPatch,
|
||||
ParsedModelNode,
|
||||
ParsedNodePatch,
|
||||
ParsedExposure,
|
||||
ParsedRPCNode,
|
||||
ParsedSeedNode,
|
||||
ParsedSchemaTestNode,
|
||||
ParsedSnapshotNode,
|
||||
UnpatchedSourceDefinition,
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import SourcePatch
|
||||
from dbt.contracts.util import Writable, Replaceable, MacroKey, SourceKey
|
||||
from dbt.exceptions import (
|
||||
raise_duplicate_resource_name, raise_duplicate_patch_name,
|
||||
raise_duplicate_macro_patch_name, CompilationException, InternalException,
|
||||
raise_compiler_error, raise_duplicate_source_patch_name
|
||||
)
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.ui import line_wrap_message
|
||||
from dbt.version import __version__
|
||||
|
||||
|
||||
# Parsers can return anything as long as it's a unique ID
|
||||
ParsedValueType = TypeVar('ParsedValueType', bound=HasUniqueID)
|
||||
|
||||
|
||||
def _check_duplicates(
|
||||
value: HasUniqueID, src: Mapping[str, HasUniqueID]
|
||||
):
|
||||
if value.unique_id in src:
|
||||
raise_duplicate_resource_name(value, src[value.unique_id])
|
||||
|
||||
|
||||
ManifestNodes = Union[
|
||||
ParsedAnalysisNode,
|
||||
ParsedDataTestNode,
|
||||
ParsedHookNode,
|
||||
ParsedModelNode,
|
||||
ParsedRPCNode,
|
||||
ParsedSchemaTestNode,
|
||||
ParsedSeedNode,
|
||||
ParsedSnapshotNode,
|
||||
]
|
||||
|
||||
|
||||
def dict_field():
|
||||
return field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParseResult(JsonSchemaMixin, Writable, Replaceable):
|
||||
vars_hash: FileHash
|
||||
profile_hash: FileHash
|
||||
project_hashes: MutableMapping[str, FileHash]
|
||||
nodes: MutableMapping[str, ManifestNodes] = dict_field()
|
||||
sources: MutableMapping[str, UnpatchedSourceDefinition] = dict_field()
|
||||
docs: MutableMapping[str, ParsedDocumentation] = dict_field()
|
||||
macros: MutableMapping[str, ParsedMacro] = dict_field()
|
||||
exposures: MutableMapping[str, ParsedExposure] = dict_field()
|
||||
macro_patches: MutableMapping[MacroKey, ParsedMacroPatch] = dict_field()
|
||||
patches: MutableMapping[str, ParsedNodePatch] = dict_field()
|
||||
source_patches: MutableMapping[SourceKey, SourcePatch] = dict_field()
|
||||
files: MutableMapping[str, SourceFile] = dict_field()
|
||||
disabled: MutableMapping[str, List[CompileResultNode]] = dict_field()
|
||||
dbt_version: str = __version__
|
||||
|
||||
def get_file(self, source_file: SourceFile) -> SourceFile:
|
||||
key = source_file.search_key
|
||||
if key is None:
|
||||
return source_file
|
||||
if key not in self.files:
|
||||
self.files[key] = source_file
|
||||
return self.files[key]
|
||||
|
||||
def add_source(
|
||||
self, source_file: SourceFile, source: UnpatchedSourceDefinition
|
||||
):
|
||||
# sources can't be overwritten!
|
||||
_check_duplicates(source, self.sources)
|
||||
self.sources[source.unique_id] = source
|
||||
self.get_file(source_file).sources.append(source.unique_id)
|
||||
|
||||
def add_node_nofile(self, node: ManifestNodes):
|
||||
# nodes can't be overwritten!
|
||||
_check_duplicates(node, self.nodes)
|
||||
self.nodes[node.unique_id] = node
|
||||
|
||||
def add_node(self, source_file: SourceFile, node: ManifestNodes):
|
||||
self.add_node_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
|
||||
def add_exposure(self, source_file: SourceFile, exposure: ParsedExposure):
|
||||
_check_duplicates(exposure, self.exposures)
|
||||
self.exposures[exposure.unique_id] = exposure
|
||||
self.get_file(source_file).exposures.append(exposure.unique_id)
|
||||
|
||||
def add_disabled_nofile(self, node: CompileResultNode):
|
||||
if node.unique_id in self.disabled:
|
||||
self.disabled[node.unique_id].append(node)
|
||||
else:
|
||||
self.disabled[node.unique_id] = [node]
|
||||
|
||||
def add_disabled(self, source_file: SourceFile, node: CompileResultNode):
|
||||
self.add_disabled_nofile(node)
|
||||
self.get_file(source_file).nodes.append(node.unique_id)
|
||||
|
||||
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
|
||||
if macro.unique_id in self.macros:
|
||||
# detect that the macro exists and emit an error
|
||||
other_path = self.macros[macro.unique_id].original_file_path
|
||||
# subtract 2 for the "Compilation Error" indent
|
||||
# note that the line wrap eats newlines, so if you want newlines,
|
||||
# this is the result :(
|
||||
msg = line_wrap_message(
|
||||
f'''\
|
||||
dbt found two macros named "{macro.name}" in the project
|
||||
"{macro.package_name}".
|
||||
|
||||
|
||||
To fix this error, rename or remove one of the following
|
||||
macros:
|
||||
|
||||
- {macro.original_file_path}
|
||||
|
||||
- {other_path}
|
||||
''',
|
||||
subtract=2
|
||||
)
|
||||
raise_compiler_error(msg)
|
||||
|
||||
self.macros[macro.unique_id] = macro
|
||||
self.get_file(source_file).macros.append(macro.unique_id)
|
||||
|
||||
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
|
||||
_check_duplicates(doc, self.docs)
|
||||
self.docs[doc.unique_id] = doc
|
||||
self.get_file(source_file).docs.append(doc.unique_id)
|
||||
|
||||
def add_patch(
|
||||
self, source_file: SourceFile, patch: ParsedNodePatch
|
||||
) -> None:
|
||||
# patches can't be overwritten
|
||||
if patch.name in self.patches:
|
||||
raise_duplicate_patch_name(patch, self.patches[patch.name])
|
||||
self.patches[patch.name] = patch
|
||||
self.get_file(source_file).patches.append(patch.name)
|
||||
|
||||
def add_macro_patch(
|
||||
self, source_file: SourceFile, patch: ParsedMacroPatch
|
||||
) -> None:
|
||||
# macros are fully namespaced
|
||||
key = (patch.package_name, patch.name)
|
||||
if key in self.macro_patches:
|
||||
raise_duplicate_macro_patch_name(patch, self.macro_patches[key])
|
||||
self.macro_patches[key] = patch
|
||||
self.get_file(source_file).macro_patches.append(key)
|
||||
|
||||
def add_source_patch(
|
||||
self, source_file: SourceFile, patch: SourcePatch
|
||||
) -> None:
|
||||
# source patches must be unique
|
||||
key = (patch.overrides, patch.name)
|
||||
if key in self.source_patches:
|
||||
raise_duplicate_source_patch_name(patch, self.source_patches[key])
|
||||
self.source_patches[key] = patch
|
||||
self.get_file(source_file).source_patches.append(key)
|
||||
|
||||
def _get_disabled(
|
||||
self,
|
||||
unique_id: str,
|
||||
match_file: SourceFile,
|
||||
) -> List[CompileResultNode]:
|
||||
if unique_id not in self.disabled:
|
||||
raise InternalException(
|
||||
'called _get_disabled with id={}, but it does not exist'
|
||||
.format(unique_id)
|
||||
)
|
||||
return [
|
||||
n for n in self.disabled[unique_id]
|
||||
if n.original_file_path == match_file.path.original_file_path
|
||||
]
|
||||
|
||||
def _process_node(
|
||||
self,
|
||||
node_id: str,
|
||||
source_file: SourceFile,
|
||||
old_file: SourceFile,
|
||||
old_result: 'ParseResult',
|
||||
) -> None:
|
||||
"""Nodes are a special kind of complicated - there can be multiple
|
||||
with the same name, as long as all but one are disabled.
|
||||
|
||||
Only handle nodes where the matching node has the same resource type
|
||||
as the current parser.
|
||||
"""
|
||||
source_path = source_file.path.original_file_path
|
||||
found: bool = False
|
||||
if node_id in old_result.nodes:
|
||||
old_node = old_result.nodes[node_id]
|
||||
if old_node.original_file_path == source_path:
|
||||
self.add_node(source_file, old_node)
|
||||
found = True
|
||||
|
||||
if node_id in old_result.disabled:
|
||||
matches = old_result._get_disabled(node_id, source_file)
|
||||
for match in matches:
|
||||
self.add_disabled(source_file, match)
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
raise CompilationException(
|
||||
'Expected to find "{}" in cached "manifest.nodes" or '
|
||||
'"manifest.disabled" based on cached file information: {}!'
|
||||
.format(node_id, old_file)
|
||||
)
|
||||
|
||||
def sanitized_update(
|
||||
self,
|
||||
source_file: SourceFile,
|
||||
old_result: 'ParseResult',
|
||||
resource_type: NodeType,
|
||||
) -> bool:
|
||||
"""Perform a santized update. If the file can't be updated, invalidate
|
||||
it and return false.
|
||||
"""
|
||||
if isinstance(source_file.path, RemoteFile):
|
||||
return False
|
||||
|
||||
old_file = old_result.get_file(source_file)
|
||||
for doc_id in old_file.docs:
|
||||
doc = _expect_value(doc_id, old_result.docs, old_file, "docs")
|
||||
self.add_doc(source_file, doc)
|
||||
|
||||
for macro_id in old_file.macros:
|
||||
macro = _expect_value(
|
||||
macro_id, old_result.macros, old_file, "macros"
|
||||
)
|
||||
self.add_macro(source_file, macro)
|
||||
|
||||
for source_id in old_file.sources:
|
||||
source = _expect_value(
|
||||
source_id, old_result.sources, old_file, "sources"
|
||||
)
|
||||
self.add_source(source_file, source)
|
||||
|
||||
# because we know this is how we _parsed_ the node, we can safely
|
||||
# assume if it's disabled it was done by the project or file, and
|
||||
# we can keep our old data
|
||||
# the node ID could be in old_result.disabled AND in old_result.nodes.
|
||||
# In that case, we have to make sure the path also matches.
|
||||
for node_id in old_file.nodes:
|
||||
# cheat: look at the first part of the node ID and compare it to
|
||||
# the parser resource type. On a mismatch, bail out.
|
||||
if resource_type != node_id.split('.')[0]:
|
||||
continue
|
||||
self._process_node(node_id, source_file, old_file, old_result)
|
||||
|
||||
for exposure_id in old_file.exposures:
|
||||
exposure = _expect_value(
|
||||
exposure_id, old_result.exposures, old_file, "exposures"
|
||||
)
|
||||
self.add_exposure(source_file, exposure)
|
||||
|
||||
patched = False
|
||||
for name in old_file.patches:
|
||||
patch = _expect_value(
|
||||
name, old_result.patches, old_file, "patches"
|
||||
)
|
||||
self.add_patch(source_file, patch)
|
||||
patched = True
|
||||
if patched:
|
||||
self.get_file(source_file).patches.sort()
|
||||
|
||||
macro_patched = False
|
||||
for key in old_file.macro_patches:
|
||||
macro_patch = _expect_value(
|
||||
key, old_result.macro_patches, old_file, "macro_patches"
|
||||
)
|
||||
self.add_macro_patch(source_file, macro_patch)
|
||||
macro_patched = True
|
||||
if macro_patched:
|
||||
self.get_file(source_file).macro_patches.sort()
|
||||
|
||||
return True
|
||||
|
||||
def has_file(self, source_file: SourceFile) -> bool:
|
||||
key = source_file.search_key
|
||||
if key is None:
|
||||
return False
|
||||
if key not in self.files:
|
||||
return False
|
||||
my_checksum = self.files[key].checksum
|
||||
return my_checksum == source_file.checksum
|
||||
|
||||
@classmethod
|
||||
def rpc(cls):
|
||||
# ugh!
|
||||
return cls(FileHash.empty(), FileHash.empty(), {})
|
||||
|
||||
|
||||
K_T = TypeVar('K_T')
|
||||
V_T = TypeVar('V_T')
|
||||
|
||||
|
||||
def _expect_value(
|
||||
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
|
||||
) -> V_T:
|
||||
if key not in src:
|
||||
raise CompilationException(
|
||||
'Expected to find "{}" in cached "result.{}" based '
|
||||
'on cached file information: {}!'
|
||||
.format(key, name, old_file)
|
||||
)
|
||||
return src[key]
|
||||
@@ -22,11 +22,10 @@ class RPCBlock(FileBlock):
|
||||
|
||||
|
||||
class RPCCallParser(SimpleSQLParser[ParsedRPCNode]):
|
||||
def get_paths(self):
|
||||
return []
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedRPCNode:
|
||||
return ParsedRPCNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedRPCNode.validate(dct)
|
||||
return ParsedRPCNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
|
||||
@@ -179,12 +179,13 @@ class TestBuilder(Generic[Testable]):
|
||||
- or it may not be namespaced (test)
|
||||
|
||||
"""
|
||||
# The 'test_name' is used to find the 'macro' that implements the test
|
||||
TEST_NAME_PATTERN = re.compile(
|
||||
r'((?P<test_namespace>([a-zA-Z_][0-9a-zA-Z_]*))\.)?'
|
||||
r'(?P<test_name>([a-zA-Z_][0-9a-zA-Z_]*))'
|
||||
)
|
||||
# map magic keys to default values
|
||||
MODIFIER_ARGS = {'severity': 'ERROR', 'tags': []}
|
||||
# kwargs representing test configs
|
||||
MODIFIER_ARGS = ('severity', 'tags', 'enabled')
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -216,11 +217,12 @@ class TestBuilder(Generic[Testable]):
|
||||
self.name: str = groups['test_name']
|
||||
self.namespace: str = groups['test_namespace']
|
||||
self.modifiers: Dict[str, Any] = {}
|
||||
for key, default in self.MODIFIER_ARGS.items():
|
||||
value = self.args.pop(key, default)
|
||||
for key in self.MODIFIER_ARGS:
|
||||
value = self.args.pop(key, None)
|
||||
if isinstance(value, str):
|
||||
value = get_rendered(value, render_ctx)
|
||||
self.modifiers[key] = value
|
||||
value = get_rendered(value, render_ctx, native=True)
|
||||
if value is not None:
|
||||
self.modifiers[key] = value
|
||||
|
||||
if self.namespace is not None:
|
||||
self.package_name = self.namespace
|
||||
@@ -266,8 +268,15 @@ class TestBuilder(Generic[Testable]):
|
||||
test_args['column_name'] = name
|
||||
return test_name, test_args
|
||||
|
||||
def severity(self) -> str:
|
||||
return self.modifiers.get('severity', 'ERROR').upper()
|
||||
def enabled(self) -> Optional[bool]:
|
||||
return self.modifiers.get('enabled')
|
||||
|
||||
def severity(self) -> Optional[str]:
|
||||
sev = self.modifiers.get('severity')
|
||||
if sev:
|
||||
return sev.upper()
|
||||
else:
|
||||
return None
|
||||
|
||||
def tags(self) -> List[str]:
|
||||
tags = self.modifiers.get('tags', [])
|
||||
@@ -302,13 +311,25 @@ class TestBuilder(Generic[Testable]):
|
||||
name = '{}_{}'.format(self.namespace, name)
|
||||
return get_nice_schema_test_name(name, self.target.name, self.args)
|
||||
|
||||
def construct_config(self) -> str:
|
||||
configs = ",".join([
|
||||
f"{key}=" + (f"'{value}'" if isinstance(value, str) else str(value))
|
||||
for key, value
|
||||
in self.modifiers.items()
|
||||
])
|
||||
if configs:
|
||||
return f"{{{{ config({configs}) }}}}"
|
||||
else:
|
||||
return ""
|
||||
|
||||
# this is the 'raw_sql' that's used in 'render_update' and execution
|
||||
# of the test macro
|
||||
def build_raw_sql(self) -> str:
|
||||
return (
|
||||
"{{{{ config(severity='{severity}') }}}}"
|
||||
"{{{{ {macro}(**{kwargs_name}) }}}}"
|
||||
"{config}{{{{ {macro}(**{kwargs_name}) }}}}"
|
||||
).format(
|
||||
macro=self.macro_name(),
|
||||
severity=self.severity(),
|
||||
config=self.construct_config(),
|
||||
kwargs_name=SCHEMA_TEST_KWARGS_NAME,
|
||||
)
|
||||
|
||||
|
||||
@@ -6,9 +6,9 @@ from typing import (
|
||||
Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type
|
||||
)
|
||||
|
||||
from hologram import ValidationError, JsonSchemaMixin
|
||||
from dbt.dataclass_schema import ValidationError, dbtClassMixin
|
||||
|
||||
from dbt.adapters.factory import get_adapter
|
||||
from dbt.adapters.factory import get_adapter, get_adapter_package_names
|
||||
from dbt.clients.jinja import get_rendered, add_rendered_test_kwargs
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.config.renderer import SchemaYamlRenderer
|
||||
@@ -20,7 +20,10 @@ from dbt.context.context_config import (
|
||||
)
|
||||
from dbt.context.configured import generate_schema_yml
|
||||
from dbt.context.target import generate_target_context
|
||||
from dbt.context.providers import generate_parse_exposure
|
||||
from dbt.context.providers import (
|
||||
generate_parse_exposure, generate_test_context
|
||||
)
|
||||
from dbt.context.macro_resolver import MacroResolver
|
||||
from dbt.contracts.files import FileHash
|
||||
from dbt.contracts.graph.manifest import SourceFile
|
||||
from dbt.contracts.graph.model_config import SourceConfig
|
||||
@@ -49,11 +52,11 @@ from dbt.contracts.graph.unparsed import (
|
||||
from dbt.exceptions import (
|
||||
validator_error_message, JSONValidationException,
|
||||
raise_invalid_schema_yml_version, ValidationException,
|
||||
CompilationException, warn_or_error, InternalException
|
||||
CompilationException, InternalException
|
||||
)
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleParser
|
||||
from dbt.parser.search import FileBlock, FilesystemSearcher
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt.parser.schema_test_builders import (
|
||||
TestBuilder, SchemaTestBlock, TargetBlock, YamlBlock,
|
||||
TestBlock, Testable
|
||||
@@ -95,6 +98,7 @@ def error_context(
|
||||
|
||||
class ParserRef:
|
||||
"""A helper object to hold parse-time references."""
|
||||
|
||||
def __init__(self):
|
||||
self.column_info: Dict[str, ColumnInfo] = {}
|
||||
|
||||
@@ -154,9 +158,9 @@ def merge_freshness(
|
||||
|
||||
class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
def __init__(
|
||||
self, results, project, root_project, macro_manifest,
|
||||
self, project, manifest, root_project,
|
||||
) -> None:
|
||||
super().__init__(results, project, root_project, macro_manifest)
|
||||
super().__init__(project, manifest, root_project)
|
||||
all_v_2 = (
|
||||
self.root_project.config_version == 2 and
|
||||
self.project.config_version == 2
|
||||
@@ -172,6 +176,15 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
|
||||
self.raw_renderer = SchemaYamlRenderer(ctx)
|
||||
|
||||
internal_package_names = get_adapter_package_names(
|
||||
self.root_project.credentials.type
|
||||
)
|
||||
self.macro_resolver = MacroResolver(
|
||||
self.manifest.macros,
|
||||
self.root_project.project_name,
|
||||
internal_package_names
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_compiled_path(cls, block: FileBlock) -> str:
|
||||
# should this raise an error?
|
||||
@@ -181,29 +194,12 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
def resource_type(self) -> NodeType:
|
||||
return NodeType.Test
|
||||
|
||||
def get_paths(self):
|
||||
# TODO: In order to support this, make FilesystemSearcher accept a list
|
||||
# of file patterns. eg: ['.yml', '.yaml']
|
||||
yaml_files = list(FilesystemSearcher(
|
||||
self.project, self.project.all_source_paths, '.yaml'
|
||||
))
|
||||
if yaml_files:
|
||||
warn_or_error(
|
||||
'A future version of dbt will parse files with both'
|
||||
' .yml and .yaml file extensions. dbt found'
|
||||
f' {len(yaml_files)} files with .yaml extensions in'
|
||||
' your dbt project. To avoid errors when upgrading'
|
||||
' to a future release, either remove these files from'
|
||||
' your dbt project, or change their extensions.'
|
||||
)
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.all_source_paths, '.yml'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedSchemaTestNode:
|
||||
return ParsedSchemaTestNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedSchemaTestNode.validate(dct)
|
||||
return ParsedSchemaTestNode.from_dict(dct)
|
||||
|
||||
def _parse_format_version(
|
||||
def _check_format_version(
|
||||
self, yaml: YamlBlock
|
||||
) -> None:
|
||||
path = yaml.path.relative_path
|
||||
@@ -269,6 +265,8 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
relation_cls = adapter.Relation
|
||||
return str(relation_cls.create_from(self.root_project, node))
|
||||
|
||||
# This converts an UnpatchedSourceDefinition to a ParsedSourceDefinition
|
||||
# it is used by the SourcePatcher.
|
||||
def parse_source(
|
||||
self, target: UnpatchedSourceDefinition
|
||||
) -> ParsedSourceDefinition:
|
||||
@@ -370,10 +368,11 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
'config': self.config_dict(config),
|
||||
'test_metadata': test_metadata,
|
||||
'column_name': column_name,
|
||||
'checksum': FileHash.empty().to_dict(),
|
||||
'checksum': FileHash.empty().to_dict(omit_none=True),
|
||||
}
|
||||
try:
|
||||
return self.parse_from_dict(dct)
|
||||
ParsedSchemaTestNode.validate(dct)
|
||||
return ParsedSchemaTestNode.from_dict(dct)
|
||||
except ValidationError as exc:
|
||||
msg = validator_error_message(exc)
|
||||
# this is a bit silly, but build an UnparsedNode just for error
|
||||
@@ -386,6 +385,7 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
)
|
||||
raise CompilationException(msg, node=node) from exc
|
||||
|
||||
# lots of time spent in this method
|
||||
def _parse_generic_test(
|
||||
self,
|
||||
target: Testable,
|
||||
@@ -424,6 +424,7 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
# is not necessarily this package's name
|
||||
fqn = self.get_fqn(fqn_path, builder.fqn_name)
|
||||
|
||||
# this is the config that is used in render_update
|
||||
config = self.initial_config(fqn)
|
||||
|
||||
metadata = {
|
||||
@@ -446,9 +447,56 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
column_name=column_name,
|
||||
test_metadata=metadata,
|
||||
)
|
||||
self.render_update(node, config)
|
||||
self.render_test_update(node, config, builder)
|
||||
|
||||
return node
|
||||
|
||||
# This does special shortcut processing for the two
|
||||
# most common internal macros, not_null and unique,
|
||||
# which avoids the jinja rendering to resolve config
|
||||
# and variables, etc, which might be in the macro.
|
||||
# In the future we will look at generalizing this
|
||||
# more to handle additional macros or to use static
|
||||
# parsing to avoid jinja overhead.
|
||||
def render_test_update(self, node, config, builder):
|
||||
macro_unique_id = self.macro_resolver.get_macro_id(
|
||||
node.package_name, 'test_' + builder.name)
|
||||
# Add the depends_on here so we can limit the macros added
|
||||
# to the context in rendering processing
|
||||
node.depends_on.add_macro(macro_unique_id)
|
||||
if (macro_unique_id in
|
||||
['macro.dbt.test_not_null', 'macro.dbt.test_unique']):
|
||||
self.update_parsed_node(node, config)
|
||||
if builder.severity() is not None:
|
||||
node.unrendered_config['severity'] = builder.severity()
|
||||
node.config['severity'] = builder.severity()
|
||||
if builder.enabled() is not None:
|
||||
node.config['enabled'] = builder.enabled()
|
||||
# source node tests are processed at patch_source time
|
||||
if isinstance(builder.target, UnpatchedSourceDefinition):
|
||||
sources = [builder.target.fqn[-2], builder.target.fqn[-1]]
|
||||
node.sources.append(sources)
|
||||
else: # all other nodes
|
||||
node.refs.append([builder.target.name])
|
||||
else:
|
||||
try:
|
||||
# make a base context that doesn't have the magic kwargs field
|
||||
context = generate_test_context(
|
||||
node, self.root_project, self.manifest, config,
|
||||
self.macro_resolver,
|
||||
)
|
||||
# update with rendered test kwargs (which collects any refs)
|
||||
add_rendered_test_kwargs(context, node, capture_macros=True)
|
||||
# the parsed node is not rendered in the native context.
|
||||
get_rendered(
|
||||
node.raw_sql, context, node, capture_macros=True
|
||||
)
|
||||
self.update_parsed_node(node, config)
|
||||
except ValidationError as exc:
|
||||
# we got a ValidationError - probably bad types in config()
|
||||
msg = validator_error_message(exc)
|
||||
raise CompilationException(msg, node=node) from exc
|
||||
|
||||
def parse_source_test(
|
||||
self,
|
||||
target: UnpatchedSourceDefinition,
|
||||
@@ -480,9 +528,9 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
)
|
||||
# we can't go through result.add_node - no file... instead!
|
||||
if node.config.enabled:
|
||||
self.results.add_node_nofile(node)
|
||||
self.manifest.add_node_nofile(node)
|
||||
else:
|
||||
self.results.add_disabled_nofile(node)
|
||||
self.manifest.add_disabled_nofile(node)
|
||||
return node
|
||||
|
||||
def parse_node(self, block: SchemaTestBlock) -> ParsedSchemaTestNode:
|
||||
@@ -556,14 +604,17 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
def parse_exposures(self, block: YamlBlock) -> None:
|
||||
parser = ExposureParser(self, block)
|
||||
for node in parser.parse():
|
||||
self.results.add_exposure(block.file, node)
|
||||
self.manifest.add_exposure(block.file, node)
|
||||
|
||||
def parse_file(self, block: FileBlock) -> None:
|
||||
dct = self._yaml_from_file(block.file)
|
||||
# mark the file as seen, even if there are no macros in it
|
||||
self.results.get_file(block.file)
|
||||
|
||||
# mark the file as seen, in Manifest.files
|
||||
self.manifest.get_file(block.file)
|
||||
|
||||
if dct:
|
||||
try:
|
||||
# This does a deep_map to check for circular references
|
||||
dct = self.raw_renderer.render_data(dct)
|
||||
except CompilationException as exc:
|
||||
raise CompilationException(
|
||||
@@ -571,24 +622,58 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
|
||||
f'project {self.project.project_name}: {exc}'
|
||||
) from exc
|
||||
|
||||
# contains the FileBlock and the data (dictionary)
|
||||
yaml_block = YamlBlock.from_file_block(block, dct)
|
||||
|
||||
self._parse_format_version(yaml_block)
|
||||
# checks version
|
||||
self._check_format_version(yaml_block)
|
||||
|
||||
parser: YamlDocsReader
|
||||
for key in NodeType.documentable():
|
||||
plural = key.pluralize()
|
||||
if key == NodeType.Source:
|
||||
parser = SourceParser(self, yaml_block, plural)
|
||||
elif key == NodeType.Macro:
|
||||
parser = MacroPatchParser(self, yaml_block, plural)
|
||||
elif key == NodeType.Analysis:
|
||||
parser = AnalysisPatchParser(self, yaml_block, plural)
|
||||
else:
|
||||
parser = TestablePatchParser(self, yaml_block, plural)
|
||||
|
||||
# There are 7 kinds of parsers:
|
||||
# Model, Seed, Snapshot, Source, Macro, Analysis, Exposures
|
||||
|
||||
# NonSourceParser.parse(), TestablePatchParser is a variety of
|
||||
# NodePatchParser
|
||||
if 'models' in dct:
|
||||
parser = TestablePatchParser(self, yaml_block, 'models')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
self.parse_exposures(yaml_block)
|
||||
|
||||
# NonSourceParser.parse()
|
||||
if 'seeds' in dct:
|
||||
parser = TestablePatchParser(self, yaml_block, 'seeds')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
|
||||
# NonSourceParser.parse()
|
||||
if 'snapshots' in dct:
|
||||
parser = TestablePatchParser(self, yaml_block, 'snapshots')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
|
||||
# This parser uses SourceParser.parse() which doesn't return
|
||||
# any test blocks. Source tests are handled at a later point
|
||||
# in the process.
|
||||
if 'sources' in dct:
|
||||
parser = SourceParser(self, yaml_block, 'sources')
|
||||
parser.parse()
|
||||
|
||||
# NonSourceParser.parse()
|
||||
if 'macros' in dct:
|
||||
parser = MacroPatchParser(self, yaml_block, 'macros')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
|
||||
# NonSourceParser.parse()
|
||||
if 'analyses' in dct:
|
||||
parser = AnalysisPatchParser(self, yaml_block, 'analyses')
|
||||
for test_block in parser.parse():
|
||||
self.parse_tests(test_block)
|
||||
|
||||
# parse exposures
|
||||
if 'exposures' in dct:
|
||||
self.parse_exposures(yaml_block)
|
||||
|
||||
|
||||
Parsed = TypeVar(
|
||||
@@ -605,17 +690,20 @@ NonSourceTarget = TypeVar(
|
||||
)
|
||||
|
||||
|
||||
# abstract base class (ABCMeta)
|
||||
class YamlReader(metaclass=ABCMeta):
|
||||
def __init__(
|
||||
self, schema_parser: SchemaParser, yaml: YamlBlock, key: str
|
||||
) -> None:
|
||||
self.schema_parser = schema_parser
|
||||
# key: models, seeds, snapshots, sources, macros,
|
||||
# analyses, exposures
|
||||
self.key = key
|
||||
self.yaml = yaml
|
||||
|
||||
@property
|
||||
def results(self):
|
||||
return self.schema_parser.results
|
||||
def manifest(self):
|
||||
return self.schema_parser.manifest
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
@@ -629,6 +717,9 @@ class YamlReader(metaclass=ABCMeta):
|
||||
def root_project(self):
|
||||
return self.schema_parser.root_project
|
||||
|
||||
# for the different schema subparsers ('models', 'source', etc)
|
||||
# get the list of dicts pointed to by the key in the yaml config,
|
||||
# ensure that the dicts have string keys
|
||||
def get_key_dicts(self) -> Iterable[Dict[str, Any]]:
|
||||
data = self.yaml.data.get(self.key, [])
|
||||
if not isinstance(data, list):
|
||||
@@ -638,7 +729,10 @@ class YamlReader(metaclass=ABCMeta):
|
||||
)
|
||||
path = self.yaml.path.original_file_path
|
||||
|
||||
# for each dict in the data (which is a list of dicts)
|
||||
for entry in data:
|
||||
# check that entry is a dict and that all dict values
|
||||
# are strings
|
||||
if coerce_dict_str(entry) is not None:
|
||||
yield entry
|
||||
else:
|
||||
@@ -654,19 +748,27 @@ class YamlDocsReader(YamlReader):
|
||||
raise NotImplementedError('parse is abstract')
|
||||
|
||||
|
||||
T = TypeVar('T', bound=JsonSchemaMixin)
|
||||
T = TypeVar('T', bound=dbtClassMixin)
|
||||
|
||||
|
||||
# This parses the 'sources' keys in yaml files.
|
||||
class SourceParser(YamlDocsReader):
|
||||
def _target_from_dict(self, cls: Type[T], data: Dict[str, Any]) -> T:
|
||||
path = self.yaml.path.original_file_path
|
||||
try:
|
||||
cls.validate(data)
|
||||
return cls.from_dict(data)
|
||||
except (ValidationError, JSONValidationException) as exc:
|
||||
msg = error_context(path, self.key, data, exc)
|
||||
raise CompilationException(msg) from exc
|
||||
|
||||
# The other parse method returns TestBlocks. This one doesn't.
|
||||
# This takes the yaml dictionaries in 'sources' keys and uses them
|
||||
# to create UnparsedSourceDefinition objects. They are then turned
|
||||
# into UnpatchedSourceDefinition objects in 'add_source_definitions'
|
||||
# or SourcePatch objects in 'add_source_patch'
|
||||
def parse(self) -> List[TestBlock]:
|
||||
# get a verified list of dicts for the key handled by this parser
|
||||
for data in self.get_key_dicts():
|
||||
data = self.project.credentials.translate_aliases(
|
||||
data, recurse=True
|
||||
@@ -676,7 +778,7 @@ class SourceParser(YamlDocsReader):
|
||||
if is_override:
|
||||
data['path'] = self.yaml.path.original_file_path
|
||||
patch = self._target_from_dict(SourcePatch, data)
|
||||
self.results.add_source_patch(self.yaml.file, patch)
|
||||
self.manifest.add_source_patch(self.yaml.file, patch)
|
||||
else:
|
||||
source = self._target_from_dict(UnparsedSourceDefinition, data)
|
||||
self.add_source_definitions(source)
|
||||
@@ -706,13 +808,15 @@ class SourceParser(YamlDocsReader):
|
||||
resource_type=NodeType.Source,
|
||||
fqn=fqn,
|
||||
)
|
||||
self.results.add_source(self.yaml.file, result)
|
||||
self.manifest.add_source(self.yaml.file, result)
|
||||
|
||||
|
||||
# This class has three main subclasses: TestablePatchParser (models,
|
||||
# seeds, snapshots), MacroPatchParser, and AnalysisPatchParser
|
||||
class NonSourceParser(YamlDocsReader, Generic[NonSourceTarget, Parsed]):
|
||||
@abstractmethod
|
||||
def _target_type(self) -> Type[NonSourceTarget]:
|
||||
raise NotImplementedError('_unsafe_from_dict not implemented')
|
||||
raise NotImplementedError('_target_type not implemented')
|
||||
|
||||
@abstractmethod
|
||||
def get_block(self, node: NonSourceTarget) -> TargetBlock:
|
||||
@@ -727,33 +831,55 @@ class NonSourceParser(YamlDocsReader, Generic[NonSourceTarget, Parsed]):
|
||||
def parse(self) -> List[TestBlock]:
|
||||
node: NonSourceTarget
|
||||
test_blocks: List[TestBlock] = []
|
||||
# get list of 'node' objects
|
||||
# UnparsedNodeUpdate (TestablePatchParser, models, seeds, snapshots)
|
||||
# = HasColumnTests, HasTests
|
||||
# UnparsedAnalysisUpdate (UnparsedAnalysisParser, analyses)
|
||||
# = HasColumnDocs, HasDocs
|
||||
# UnparsedMacroUpdate (MacroPatchParser, 'macros')
|
||||
# = HasDocs
|
||||
# correspond to this parser's 'key'
|
||||
for node in self.get_unparsed_target():
|
||||
# node_block is a TargetBlock (Macro or Analysis)
|
||||
# or a TestBlock (all of the others)
|
||||
node_block = self.get_block(node)
|
||||
if isinstance(node_block, TestBlock):
|
||||
# TestablePatchParser = models, seeds, snapshots
|
||||
test_blocks.append(node_block)
|
||||
if isinstance(node, (HasColumnDocs, HasColumnTests)):
|
||||
# UnparsedNodeUpdate and UnparsedAnalysisUpdate
|
||||
refs: ParserRef = ParserRef.from_target(node)
|
||||
else:
|
||||
refs = ParserRef()
|
||||
# This adds the node_block to self.manifest
|
||||
# as a ParsedNodePatch or ParsedMacroPatch
|
||||
self.parse_patch(node_block, refs)
|
||||
return test_blocks
|
||||
|
||||
def get_unparsed_target(self) -> Iterable[NonSourceTarget]:
|
||||
path = self.yaml.path.original_file_path
|
||||
|
||||
for data in self.get_key_dicts():
|
||||
# get verified list of dicts for the 'key' that this
|
||||
# parser handles
|
||||
key_dicts = self.get_key_dicts()
|
||||
for data in key_dicts:
|
||||
# add extra data to each dict. This updates the dicts
|
||||
# in the parser yaml
|
||||
data.update({
|
||||
'original_file_path': path,
|
||||
'yaml_key': self.key,
|
||||
'package_name': self.project.project_name,
|
||||
})
|
||||
try:
|
||||
model = self._target_type().from_dict(data)
|
||||
# target_type: UnparsedNodeUpdate, UnparsedAnalysisUpdate,
|
||||
# or UnparsedMacroUpdate
|
||||
self._target_type().validate(data)
|
||||
node = self._target_type().from_dict(data)
|
||||
except (ValidationError, JSONValidationException) as exc:
|
||||
msg = error_context(path, self.key, data, exc)
|
||||
raise CompilationException(msg) from exc
|
||||
else:
|
||||
yield model
|
||||
yield node
|
||||
|
||||
|
||||
class NodePatchParser(
|
||||
@@ -763,7 +889,7 @@ class NodePatchParser(
|
||||
def parse_patch(
|
||||
self, block: TargetBlock[NodeTarget], refs: ParserRef
|
||||
) -> None:
|
||||
result = ParsedNodePatch(
|
||||
patch = ParsedNodePatch(
|
||||
name=block.target.name,
|
||||
original_file_path=block.target.original_file_path,
|
||||
yaml_key=block.target.yaml_key,
|
||||
@@ -773,7 +899,7 @@ class NodePatchParser(
|
||||
meta=block.target.meta,
|
||||
docs=block.target.docs,
|
||||
)
|
||||
self.results.add_patch(self.yaml.file, result)
|
||||
self.manifest.add_patch(self.yaml.file, patch)
|
||||
|
||||
|
||||
class TestablePatchParser(NodePatchParser[UnparsedNodeUpdate]):
|
||||
@@ -802,7 +928,7 @@ class MacroPatchParser(NonSourceParser[UnparsedMacroUpdate, ParsedMacroPatch]):
|
||||
def parse_patch(
|
||||
self, block: TargetBlock[UnparsedMacroUpdate], refs: ParserRef
|
||||
) -> None:
|
||||
result = ParsedMacroPatch(
|
||||
patch = ParsedMacroPatch(
|
||||
name=block.target.name,
|
||||
original_file_path=block.target.original_file_path,
|
||||
yaml_key=block.target.yaml_key,
|
||||
@@ -812,7 +938,7 @@ class MacroPatchParser(NonSourceParser[UnparsedMacroUpdate, ParsedMacroPatch]):
|
||||
meta=block.target.meta,
|
||||
docs=block.target.docs,
|
||||
)
|
||||
self.results.add_macro_patch(self.yaml.file, result)
|
||||
self.manifest.add_macro_patch(self.yaml.file, patch)
|
||||
|
||||
|
||||
class ExposureParser(YamlReader):
|
||||
@@ -846,7 +972,7 @@ class ExposureParser(YamlReader):
|
||||
ctx = generate_parse_exposure(
|
||||
parsed,
|
||||
self.root_project,
|
||||
self.schema_parser.macro_manifest,
|
||||
self.schema_parser.manifest,
|
||||
package_name,
|
||||
)
|
||||
depends_on_jinja = '\n'.join(
|
||||
@@ -861,6 +987,7 @@ class ExposureParser(YamlReader):
|
||||
def parse(self) -> Iterable[ParsedExposure]:
|
||||
for data in self.get_key_dicts():
|
||||
try:
|
||||
UnparsedExposure.validate(data)
|
||||
unparsed = UnparsedExposure.from_dict(data)
|
||||
except (ValidationError, JSONValidationException) as exc:
|
||||
msg = error_context(self.yaml.path, self.key, data, exc)
|
||||
|
||||
@@ -11,6 +11,8 @@ from dbt.contracts.files import SourceFile, FilePath
|
||||
from dbt.exceptions import CompilationException, InternalException
|
||||
|
||||
|
||||
# What's the point of wrapping a SourceFile with this class?
|
||||
# Could it be removed?
|
||||
@dataclass
|
||||
class FileBlock:
|
||||
file: SourceFile
|
||||
@@ -30,6 +32,9 @@ class FileBlock:
|
||||
return self.file.path
|
||||
|
||||
|
||||
# The BlockTag is used in Jinja processing
|
||||
# Why do we have different classes where the only
|
||||
# difference is what 'contents' returns?
|
||||
@dataclass
|
||||
class BlockContents(FileBlock):
|
||||
file: SourceFile # if you remove this, mypy will get upset
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
from dbt.context.context_config import ContextConfig
|
||||
from dbt.contracts.files import SourceFile, FilePath
|
||||
from dbt.contracts.graph.parsed import ParsedSeedNode
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FileBlock, FilesystemSearcher
|
||||
from dbt.parser.search import FileBlock
|
||||
|
||||
|
||||
class SeedParser(SimpleSQLParser[ParsedSeedNode]):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.data_paths, '.csv'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> ParsedSeedNode:
|
||||
return ParsedSeedNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
ParsedSeedNode.validate(dct)
|
||||
return ParsedSeedNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
@@ -27,13 +23,3 @@ class SeedParser(SimpleSQLParser[ParsedSeedNode]):
|
||||
self, parsed_node: ParsedSeedNode, config: ContextConfig
|
||||
) -> None:
|
||||
"""Seeds don't need to do any rendering."""
|
||||
|
||||
def load_file(
|
||||
self, match: FilePath, *, set_contents: bool = False
|
||||
) -> SourceFile:
|
||||
if match.seed_too_large():
|
||||
# We don't want to calculate a hash of this file. Use the path.
|
||||
return SourceFile.big_seed(match)
|
||||
else:
|
||||
# We want to calculate a hash, but we don't need the contents
|
||||
return super().load_file(match, set_contents=set_contents)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from hologram import ValidationError
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
from dbt.contracts.graph.parsed import (
|
||||
IntermediateSnapshotNode, ParsedSnapshotNode
|
||||
@@ -12,7 +12,7 @@ from dbt.exceptions import (
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SQLParser
|
||||
from dbt.parser.search import (
|
||||
FilesystemSearcher, BlockContents, BlockSearcher, FileBlock
|
||||
BlockContents, BlockSearcher, FileBlock
|
||||
)
|
||||
from dbt.utils import split_path
|
||||
|
||||
@@ -20,13 +20,10 @@ from dbt.utils import split_path
|
||||
class SnapshotParser(
|
||||
SQLParser[IntermediateSnapshotNode, ParsedSnapshotNode]
|
||||
):
|
||||
def get_paths(self):
|
||||
return FilesystemSearcher(
|
||||
self.project, self.project.snapshot_paths, '.sql'
|
||||
)
|
||||
|
||||
def parse_from_dict(self, dct, validate=True) -> IntermediateSnapshotNode:
|
||||
return IntermediateSnapshotNode.from_dict(dct, validate=validate)
|
||||
if validate:
|
||||
IntermediateSnapshotNode.validate(dct)
|
||||
return IntermediateSnapshotNode.from_dict(dct)
|
||||
|
||||
@property
|
||||
def resource_type(self) -> NodeType:
|
||||
@@ -66,7 +63,8 @@ class SnapshotParser(
|
||||
|
||||
def transform(self, node: IntermediateSnapshotNode) -> ParsedSnapshotNode:
|
||||
try:
|
||||
parsed_node = ParsedSnapshotNode.from_dict(node.to_dict())
|
||||
dct = node.to_dict(omit_none=True)
|
||||
parsed_node = ParsedSnapshotNode.from_dict(dct)
|
||||
self.set_snapshot_attributes(parsed_node)
|
||||
return parsed_node
|
||||
except ValidationError as exc:
|
||||
@@ -83,4 +81,4 @@ class SnapshotParser(
|
||||
# in case there are no snapshots declared, we still want to mark this
|
||||
# file as seen. But after we've finished, because we don't want to add
|
||||
# files with syntax errors
|
||||
self.results.get_file(file_block.file)
|
||||
self.manifest.get_file(file_block.file)
|
||||
|
||||
@@ -4,6 +4,7 @@ from typing import (
|
||||
Dict,
|
||||
Optional,
|
||||
Set,
|
||||
Union,
|
||||
)
|
||||
from dbt.config import RuntimeConfig
|
||||
from dbt.contracts.graph.manifest import Manifest, SourceKey
|
||||
@@ -21,22 +22,17 @@ from dbt.contracts.graph.unparsed import (
|
||||
from dbt.exceptions import warn_or_error
|
||||
|
||||
from dbt.parser.schemas import SchemaParser, ParserRef
|
||||
from dbt.parser.results import ParseResult
|
||||
from dbt import ui
|
||||
|
||||
|
||||
class SourcePatcher:
|
||||
def __init__(
|
||||
self,
|
||||
results: ParseResult,
|
||||
root_project: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
) -> None:
|
||||
self.results = results
|
||||
self.root_project = root_project
|
||||
self.macro_manifest = Manifest.from_macros(
|
||||
macros=self.results.macros,
|
||||
files=self.results.files
|
||||
)
|
||||
self.manifest = manifest
|
||||
self.schema_parsers: Dict[str, SchemaParser] = {}
|
||||
self.patches_used: Dict[SourceKey, Set[str]] = {}
|
||||
self.sources: Dict[str, ParsedSourceDefinition] = {}
|
||||
@@ -49,8 +45,8 @@ class SourcePatcher:
|
||||
if patch is None:
|
||||
return unpatched
|
||||
|
||||
source_dct = unpatched.source.to_dict()
|
||||
table_dct = unpatched.table.to_dict()
|
||||
source_dct = unpatched.source.to_dict(omit_none=True)
|
||||
table_dct = unpatched.table.to_dict(omit_none=True)
|
||||
patch_path: Optional[Path] = None
|
||||
|
||||
source_table_patch: Optional[SourceTablePatch] = None
|
||||
@@ -85,7 +81,7 @@ class SourcePatcher:
|
||||
all_projects = self.root_project.load_dependencies()
|
||||
project = all_projects[package_name]
|
||||
schema_parser = SchemaParser(
|
||||
self.results, project, self.root_project, self.macro_manifest
|
||||
project, self.manifest, self.root_project
|
||||
)
|
||||
self.schema_parsers[package_name] = schema_parser
|
||||
return schema_parser
|
||||
@@ -103,10 +99,12 @@ class SourcePatcher:
|
||||
|
||||
def get_patch_for(
|
||||
self,
|
||||
unpatched: UnpatchedSourceDefinition,
|
||||
unpatched: Union[UnpatchedSourceDefinition, ParsedSourceDefinition],
|
||||
) -> Optional[SourcePatch]:
|
||||
if isinstance(unpatched, ParsedSourceDefinition):
|
||||
return None
|
||||
key = (unpatched.package_name, unpatched.source.name)
|
||||
patch: Optional[SourcePatch] = self.results.source_patches.get(key)
|
||||
patch: Optional[SourcePatch] = self.manifest.source_patches.get(key)
|
||||
if patch is None:
|
||||
return None
|
||||
if key not in self.patches_used:
|
||||
@@ -119,7 +117,9 @@ class SourcePatcher:
|
||||
def construct_sources(self) -> None:
|
||||
# given the UnpatchedSourceDefinition and SourcePatches, combine them
|
||||
# to make a beautiful baby ParsedSourceDefinition.
|
||||
for unique_id, unpatched in self.results.sources.items():
|
||||
for unique_id, unpatched in self.manifest.sources.items():
|
||||
if isinstance(unpatched, ParsedSourceDefinition):
|
||||
continue
|
||||
patch = self.get_patch_for(unpatched)
|
||||
|
||||
patched = self.patch_source(unpatched, patch)
|
||||
@@ -127,22 +127,22 @@ class SourcePatcher:
|
||||
# data.
|
||||
for test in self.get_source_tests(patched):
|
||||
if test.config.enabled:
|
||||
self.results.add_node_nofile(test)
|
||||
self.manifest.add_node_nofile(test)
|
||||
else:
|
||||
self.results.add_disabled_nofile(test)
|
||||
self.manifest.add_disabled_nofile(test)
|
||||
|
||||
schema_parser = self.get_schema_parser_for(unpatched.package_name)
|
||||
parsed = schema_parser.parse_source(patched)
|
||||
if parsed.config.enabled:
|
||||
self.sources[unique_id] = parsed
|
||||
else:
|
||||
self.results.add_disabled_nofile(parsed)
|
||||
self.manifest.add_disabled_nofile(parsed)
|
||||
|
||||
self.warn_unused()
|
||||
|
||||
def warn_unused(self) -> None:
|
||||
unused_tables: Dict[SourceKey, Optional[Set[str]]] = {}
|
||||
for patch in self.results.source_patches.values():
|
||||
for patch in self.manifest.source_patches.values():
|
||||
key = (patch.overrides, patch.name)
|
||||
if key not in self.patches_used:
|
||||
unused_tables[key] = None
|
||||
@@ -168,7 +168,7 @@ class SourcePatcher:
|
||||
'target:',
|
||||
]
|
||||
for key, table_names in unused_tables.items():
|
||||
patch = self.results.source_patches[key]
|
||||
patch = self.manifest.source_patches[key]
|
||||
patch_name = f'{patch.overrides}.{patch.name}'
|
||||
if table_names is None:
|
||||
msg.append(
|
||||
@@ -185,8 +185,8 @@ class SourcePatcher:
|
||||
|
||||
|
||||
def patch_sources(
|
||||
results: ParseResult,
|
||||
root_project: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
) -> Dict[str, ParsedSourceDefinition]:
|
||||
"""Patch all the sources found in the results. Updates results.disabled and
|
||||
results.nodes.
|
||||
@@ -194,6 +194,6 @@ def patch_sources(
|
||||
Return a dict of ParsedSourceDefinitions, suitable for use in
|
||||
manifest.sources.
|
||||
"""
|
||||
patcher = SourcePatcher(results, root_project)
|
||||
patcher = SourcePatcher(root_project, manifest)
|
||||
patcher.construct_sources()
|
||||
return patcher.sources
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
"""A collection of performance-enhancing functions that have to know just a
|
||||
little bit too much to go anywhere else.
|
||||
"""
|
||||
from dbt.adapters.factory import get_adapter
|
||||
from dbt.parser.manifest import load_manifest
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.config import RuntimeConfig
|
||||
|
||||
|
||||
def get_full_manifest(
|
||||
config: RuntimeConfig,
|
||||
*,
|
||||
reset: bool = False,
|
||||
) -> Manifest:
|
||||
"""Load the full manifest, using the adapter's internal manifest if it
|
||||
exists to skip parsing internal (dbt + plugins) macros a second time.
|
||||
|
||||
Also, make sure that we force-laod the adapter's manifest, so it gets
|
||||
attached to the adapter for any methods that need it.
|
||||
"""
|
||||
adapter = get_adapter(config) # type: ignore
|
||||
if reset:
|
||||
config.clear_dependencies()
|
||||
adapter.clear_macro_manifest()
|
||||
|
||||
internal: Manifest = adapter.load_macro_manifest()
|
||||
|
||||
return load_manifest(
|
||||
config,
|
||||
internal,
|
||||
adapter.connections.set_query_header,
|
||||
)
|
||||
@@ -177,7 +177,7 @@ def poll_complete(
|
||||
|
||||
|
||||
def _dict_logs(logs: List[LogMessage]) -> List[Dict[str, Any]]:
|
||||
return [log.to_dict() for log in logs]
|
||||
return [log.to_dict(omit_none=True) for log in logs]
|
||||
|
||||
|
||||
class Poll(RemoteBuiltinMethod[PollParameters, PollResult]):
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import logbook
|
||||
import logbook.queues
|
||||
from jsonrpc.exceptions import JSONRPCError
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
@@ -25,8 +24,11 @@ class QueueMessageType(StrEnum):
|
||||
terminating = frozenset((Error, Result, Timeout))
|
||||
|
||||
|
||||
# This class was subclassed from JsonSchemaMixin, but it
|
||||
# doesn't appear to be necessary, and Mashumaro does not
|
||||
# handle logbook.LogRecord
|
||||
@dataclass
|
||||
class QueueMessage(JsonSchemaMixin):
|
||||
class QueueMessage:
|
||||
message_type: QueueMessageType
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
||||
from copy import deepcopy
|
||||
from typing import List, Optional, Type, TypeVar, Generic, Dict, Any
|
||||
|
||||
from hologram import JsonSchemaMixin, ValidationError
|
||||
from dbt.dataclass_schema import dbtClassMixin, ValidationError
|
||||
|
||||
from dbt.contracts.rpc import RPCParameters, RemoteResult, RemoteMethodFlags
|
||||
from dbt.exceptions import NotImplementedException, InternalException
|
||||
@@ -109,7 +109,7 @@ class RemoteBuiltinMethod(RemoteMethod[Parameters, Result]):
|
||||
'the run() method on builtins should never be called'
|
||||
)
|
||||
|
||||
def __call__(self, **kwargs: Dict[str, Any]) -> JsonSchemaMixin:
|
||||
def __call__(self, **kwargs: Dict[str, Any]) -> dbtClassMixin:
|
||||
try:
|
||||
params = self.get_parameters().from_dict(kwargs)
|
||||
except ValidationError as exc:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
from typing import Callable, Dict, Any
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from jsonrpc.exceptions import (
|
||||
JSONRPCParseError,
|
||||
JSONRPCInvalidRequestException,
|
||||
@@ -90,11 +90,14 @@ class ResponseManager(JSONRPCResponseManager):
|
||||
@classmethod
|
||||
def _get_responses(cls, requests, dispatcher):
|
||||
for output in super()._get_responses(requests, dispatcher):
|
||||
# if it's a result, check if it's a JsonSchemaMixin and if so call
|
||||
# if it's a result, check if it's a dbtClassMixin and if so call
|
||||
# to_dict
|
||||
if hasattr(output, 'result'):
|
||||
if isinstance(output.result, JsonSchemaMixin):
|
||||
output.result = output.result.to_dict(omit_none=False)
|
||||
if isinstance(output.result, dbtClassMixin):
|
||||
# Note: errors in to_dict do not show up anywhere in
|
||||
# the output and all you get is a generic 500 error
|
||||
output.result = \
|
||||
output.result.to_dict(omit_none=False)
|
||||
yield output
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import (
|
||||
)
|
||||
from typing_extensions import Protocol
|
||||
|
||||
from hologram import JsonSchemaMixin, ValidationError
|
||||
from dbt.dataclass_schema import dbtClassMixin, ValidationError
|
||||
|
||||
import dbt.exceptions
|
||||
import dbt.flags
|
||||
@@ -187,6 +187,7 @@ def get_results_context(
|
||||
|
||||
class StateHandler:
|
||||
"""A helper context manager to manage task handler state."""
|
||||
|
||||
def __init__(self, task_handler: 'RequestTaskHandler') -> None:
|
||||
self.handler = task_handler
|
||||
|
||||
@@ -248,6 +249,7 @@ class SetArgsStateHandler(StateHandler):
|
||||
"""A state handler that does not touch state on success and does not
|
||||
execute the teardown
|
||||
"""
|
||||
|
||||
def handle_completed(self):
|
||||
pass
|
||||
|
||||
@@ -257,6 +259,7 @@ class SetArgsStateHandler(StateHandler):
|
||||
|
||||
class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
|
||||
"""Handler for the single task triggered by a given jsonrpc request."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
manager: TaskManagerProtocol,
|
||||
@@ -280,7 +283,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
|
||||
# - The actual thread that this represents, which writes its data to
|
||||
# the result and logs. The atomicity of list.append() and item
|
||||
# assignment means we don't need a lock.
|
||||
self.result: Optional[JsonSchemaMixin] = None
|
||||
self.result: Optional[dbtClassMixin] = None
|
||||
self.error: Optional[RPCException] = None
|
||||
self.state: TaskHandlerState = TaskHandlerState.NotStarted
|
||||
self.logs: List[LogMessage] = []
|
||||
@@ -388,7 +391,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
|
||||
except RPCException as exc:
|
||||
# RPC Exceptions come already preserialized for the jsonrpc
|
||||
# framework
|
||||
exc.logs = [log.to_dict() for log in self.logs]
|
||||
exc.logs = [log.to_dict(omit_none=True) for log in self.logs]
|
||||
exc.tags = self.tags
|
||||
raise
|
||||
|
||||
@@ -400,6 +403,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
|
||||
try:
|
||||
with StateHandler(self):
|
||||
self.result = self.get_result()
|
||||
|
||||
except (dbt.exceptions.Exception, RPCException):
|
||||
# we probably got an error after the RPC call ran (and it was
|
||||
# probably deps...). By now anyone who wanted to see it has seen it
|
||||
@@ -449,6 +453,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
|
||||
)
|
||||
|
||||
try:
|
||||
cls.validate(self.task_kwargs)
|
||||
return cls.from_dict(self.task_kwargs)
|
||||
except ValidationError as exc:
|
||||
# raise a TypeError to indicate invalid parameters so we get a nice
|
||||
|
||||
@@ -14,11 +14,11 @@ from dbt.contracts.rpc import (
|
||||
|
||||
|
||||
class TaskHandlerProtocol(Protocol):
|
||||
started: Optional[datetime]
|
||||
ended: Optional[datetime]
|
||||
state: TaskHandlerState
|
||||
task_id: TaskID
|
||||
process: Optional[multiprocessing.Process]
|
||||
state: TaskHandlerState
|
||||
started: Optional[datetime] = None
|
||||
ended: Optional[datetime] = None
|
||||
process: Optional[multiprocessing.Process] = None
|
||||
|
||||
@property
|
||||
def request_id(self) -> Union[str, int]:
|
||||
|
||||
@@ -19,7 +19,7 @@ from dbt.contracts.rpc import (
|
||||
TaskID,
|
||||
)
|
||||
from dbt.logger import LogMessage, list_handler
|
||||
from dbt.perf_utils import get_full_manifest
|
||||
from dbt.parser.manifest import ManifestLoader
|
||||
from dbt.rpc.error import dbt_error
|
||||
from dbt.rpc.gc import GarbageCollector
|
||||
from dbt.rpc.task_handler_protocol import TaskHandlerProtocol, TaskHandlerMap
|
||||
@@ -187,7 +187,7 @@ class TaskManager:
|
||||
return True
|
||||
|
||||
def parse_manifest(self) -> None:
|
||||
self.manifest = get_full_manifest(self.config, reset=True)
|
||||
self.manifest = ManifestLoader.get_full_manifest(self.config, reset=True)
|
||||
|
||||
def set_compile_exception(self, exc, logs=List[LogMessage]) -> None:
|
||||
assert self.last_parse.state == ManifestStatus.Compiling, \
|
||||
|
||||
@@ -4,8 +4,7 @@ import re
|
||||
from dbt.exceptions import VersionsNotCompatibleException
|
||||
import dbt.utils
|
||||
|
||||
from hologram import JsonSchemaMixin
|
||||
from hologram.helpers import StrEnum
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@@ -18,12 +17,12 @@ class Matchers(StrEnum):
|
||||
|
||||
|
||||
@dataclass
|
||||
class VersionSpecification(JsonSchemaMixin):
|
||||
major: Optional[str]
|
||||
minor: Optional[str]
|
||||
patch: Optional[str]
|
||||
prerelease: Optional[str]
|
||||
build: Optional[str]
|
||||
class VersionSpecification(dbtClassMixin):
|
||||
major: Optional[str] = None
|
||||
minor: Optional[str] = None
|
||||
patch: Optional[str] = None
|
||||
prerelease: Optional[str] = None
|
||||
build: Optional[str] = None
|
||||
matcher: Matchers = Matchers.EXACT
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ from dbt import tracking
|
||||
from dbt import ui
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.results import (
|
||||
RunModelResult, collect_timing_info
|
||||
NodeStatus, RunResult, collect_timing_info, RunStatus
|
||||
)
|
||||
from dbt.exceptions import (
|
||||
NotImplementedException, CompilationException, RuntimeException,
|
||||
@@ -165,6 +165,7 @@ class ExecutionContext:
|
||||
"""During execution and error handling, dbt makes use of mutable state:
|
||||
timing information and the newest (compiled vs executed) form of the node.
|
||||
"""
|
||||
|
||||
def __init__(self, node):
|
||||
self.timing = []
|
||||
self.node = node
|
||||
@@ -179,20 +180,20 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
self.num_nodes = num_nodes
|
||||
|
||||
self.skip = False
|
||||
self.skip_cause: Optional[RunModelResult] = None
|
||||
self.skip_cause: Optional[RunResult] = None
|
||||
|
||||
@abstractmethod
|
||||
def compile(self, manifest: Manifest) -> Any:
|
||||
pass
|
||||
|
||||
def get_result_status(self, result) -> Dict[str, str]:
|
||||
if result.error:
|
||||
return {'node_status': 'error', 'node_error': str(result.error)}
|
||||
elif result.skip:
|
||||
if result.status == NodeStatus.Error:
|
||||
return {'node_status': 'error', 'node_error': str(result.message)}
|
||||
elif result.status == NodeStatus.Skipped:
|
||||
return {'node_status': 'skipped'}
|
||||
elif result.fail:
|
||||
elif result.status == NodeStatus.Fail:
|
||||
return {'node_status': 'failed'}
|
||||
elif result.warn:
|
||||
elif result.status == NodeStatus.Warn:
|
||||
return {'node_status': 'warn'}
|
||||
else:
|
||||
return {'node_status': 'passed'}
|
||||
@@ -212,52 +213,62 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
|
||||
return result
|
||||
|
||||
def _build_run_result(self, node, start_time, error, status, timing_info,
|
||||
skip=False, fail=None, warn=None, agate_table=None):
|
||||
def _build_run_result(self, node, start_time, status, timing_info, message,
|
||||
agate_table=None, adapter_response=None):
|
||||
execution_time = time.time() - start_time
|
||||
thread_id = threading.current_thread().name
|
||||
return RunModelResult(
|
||||
node=node,
|
||||
error=error,
|
||||
skip=skip,
|
||||
if adapter_response is None:
|
||||
adapter_response = {}
|
||||
return RunResult(
|
||||
status=status,
|
||||
fail=fail,
|
||||
warn=warn,
|
||||
execution_time=execution_time,
|
||||
thread_id=thread_id,
|
||||
execution_time=execution_time,
|
||||
timing=timing_info,
|
||||
message=message,
|
||||
node=node,
|
||||
agate_table=agate_table,
|
||||
adapter_response=adapter_response
|
||||
)
|
||||
|
||||
def error_result(self, node, error, start_time, timing_info):
|
||||
def error_result(self, node, message, start_time, timing_info):
|
||||
return self._build_run_result(
|
||||
node=node,
|
||||
start_time=start_time,
|
||||
error=error,
|
||||
status='ERROR',
|
||||
timing_info=timing_info
|
||||
status=RunStatus.Error,
|
||||
timing_info=timing_info,
|
||||
message=message,
|
||||
)
|
||||
|
||||
def ephemeral_result(self, node, start_time, timing_info):
|
||||
return self._build_run_result(
|
||||
node=node,
|
||||
start_time=start_time,
|
||||
error=None,
|
||||
status=None,
|
||||
timing_info=timing_info
|
||||
status=RunStatus.Success,
|
||||
timing_info=timing_info,
|
||||
message=None
|
||||
)
|
||||
|
||||
def from_run_result(self, result, start_time, timing_info):
|
||||
return self._build_run_result(
|
||||
node=result.node,
|
||||
start_time=start_time,
|
||||
error=result.error,
|
||||
skip=result.skip,
|
||||
status=result.status,
|
||||
fail=result.fail,
|
||||
warn=result.warn,
|
||||
timing_info=timing_info,
|
||||
message=result.message,
|
||||
agate_table=result.agate_table,
|
||||
adapter_response=result.adapter_response
|
||||
)
|
||||
|
||||
def skip_result(self, node, message):
|
||||
thread_id = threading.current_thread().name
|
||||
return RunResult(
|
||||
status=RunStatus.Skipped,
|
||||
thread_id=thread_id,
|
||||
execution_time=0,
|
||||
timing=[],
|
||||
message=message,
|
||||
node=node,
|
||||
adapter_response={}
|
||||
)
|
||||
|
||||
def compile_and_execute(self, manifest, ctx):
|
||||
@@ -340,7 +351,7 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
# an error
|
||||
if (
|
||||
exc_str is not None and result is not None and
|
||||
result.error is None and error is None
|
||||
result.status != NodeStatus.Error and error is None
|
||||
):
|
||||
error = exc_str
|
||||
|
||||
@@ -389,7 +400,7 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
schema_name = self.node.schema
|
||||
node_name = self.node.name
|
||||
|
||||
error = None
|
||||
error_message = None
|
||||
if not self.node.is_ephemeral_model:
|
||||
# if this model was skipped due to an upstream ephemeral model
|
||||
# failure, print a special 'error skip' message.
|
||||
@@ -408,7 +419,7 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
'an ephemeral failure'
|
||||
)
|
||||
# set an error so dbt will exit with an error code
|
||||
error = (
|
||||
error_message = (
|
||||
'Compilation Error in {}, caused by compilation error '
|
||||
'in referenced ephemeral model {}'
|
||||
.format(self.node.unique_id,
|
||||
@@ -423,7 +434,7 @@ class BaseRunner(metaclass=ABCMeta):
|
||||
self.num_nodes
|
||||
)
|
||||
|
||||
node_result = RunModelResult(self.node, skip=True, error=error)
|
||||
node_result = self.skip_result(self.node, error_message)
|
||||
return node_result
|
||||
|
||||
def do_skip(self, cause=None):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user