mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-17 19:31:34 +00:00
Compare commits
881 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2b47cb4af3 | ||
|
|
9c696314e9 | ||
|
|
f6f551a5e1 | ||
|
|
169f8949d2 | ||
|
|
e679c70835 | ||
|
|
70d9b013c4 | ||
|
|
b7b999bff6 | ||
|
|
2d84844960 | ||
|
|
c211d0e436 | ||
|
|
6fdac5020f | ||
|
|
aea03ebba4 | ||
|
|
1cce96f4ee | ||
|
|
b78fd60343 | ||
|
|
017f4175f8 | ||
|
|
726004bb3f | ||
|
|
c4892d9f33 | ||
|
|
6e5456d14c | ||
|
|
5e706816e4 | ||
|
|
7151ac1f81 | ||
|
|
ee921cc0ca | ||
|
|
3e09319f8c | ||
|
|
2d52eda730 | ||
|
|
491ad692d7 | ||
|
|
26f573fdc8 | ||
|
|
689a0ea4cc | ||
|
|
23bfc67d62 | ||
|
|
e0f725363c | ||
|
|
141bdd6f96 | ||
|
|
54f548eec8 | ||
|
|
786791670e | ||
|
|
ce58da82c5 | ||
|
|
3437b0f2b9 | ||
|
|
0970285956 | ||
|
|
c0028587dd | ||
|
|
7fafa2adeb | ||
|
|
d1cc5deaac | ||
|
|
952b1fc61b | ||
|
|
1b03db6ab5 | ||
|
|
8a19ba4862 | ||
|
|
f73d561557 | ||
|
|
83003a7d8f | ||
|
|
84a991520f | ||
|
|
89c4dbcdba | ||
|
|
e7a24a2062 | ||
|
|
0e897f7751 | ||
|
|
ea898e25ad | ||
|
|
6d38226a1c | ||
|
|
b415e0eed3 | ||
|
|
90e8e75716 | ||
|
|
5a74918e1e | ||
|
|
d8551a6c9c | ||
|
|
51b546f50f | ||
|
|
4bc4c65795 | ||
|
|
a0493b000e | ||
|
|
7071c1d200 | ||
|
|
64ee763c99 | ||
|
|
e78d979d4c | ||
|
|
845529cc9d | ||
|
|
c71b2dc312 | ||
|
|
d8775d1054 | ||
|
|
db2e564e7a | ||
|
|
7fc71272fd | ||
|
|
8011bc7b7a | ||
|
|
171fdf792e | ||
|
|
7a01ba7b49 | ||
|
|
2aee9eedad | ||
|
|
db203371fe | ||
|
|
50fa1baf96 | ||
|
|
da7c9501d2 | ||
|
|
be53b67e68 | ||
|
|
ffcaac5b4c | ||
|
|
65c3bf6d0f | ||
|
|
58976b46f9 | ||
|
|
3e3c69eaf9 | ||
|
|
e867cfa4a2 | ||
|
|
7901413a97 | ||
|
|
b7e8670b43 | ||
|
|
e2531edb02 | ||
|
|
96913732e4 | ||
|
|
06a89446ab | ||
|
|
ac128da45a | ||
|
|
265f6d3ce5 | ||
|
|
388fd0bd00 | ||
|
|
1c6945cb75 | ||
|
|
772cb0d326 | ||
|
|
b2f2e69377 | ||
|
|
377d5b7f58 | ||
|
|
b4da0686c8 | ||
|
|
f3baa69aad | ||
|
|
9527626ffe | ||
|
|
e13568117b | ||
|
|
57c6b11d47 | ||
|
|
800355ec2f | ||
|
|
4478a89f28 | ||
|
|
9e07912e1c | ||
|
|
8fd768e46b | ||
|
|
dad3dcacfe | ||
|
|
0927093303 | ||
|
|
3099119815 | ||
|
|
7a026c7e10 | ||
|
|
7177a6543b | ||
|
|
b2b0f78587 | ||
|
|
7001afbcbe | ||
|
|
3eb28198bd | ||
|
|
81deb8d828 | ||
|
|
e30ba80d6a | ||
|
|
22d13ba881 | ||
|
|
6cfbcf1ac8 | ||
|
|
21daca9faf | ||
|
|
142edcff38 | ||
|
|
70d82ed48e | ||
|
|
52c9234621 | ||
|
|
78d309551f | ||
|
|
f91109570c | ||
|
|
291ef56bc7 | ||
|
|
b12484bb6f | ||
|
|
709ee2a0e8 | ||
|
|
8d4f2bd126 | ||
|
|
b6e7351431 | ||
|
|
329145c13f | ||
|
|
35d1a7a1b5 | ||
|
|
a2e801c2de | ||
|
|
e86c11e5de | ||
|
|
e46800f5b4 | ||
|
|
0648737fc1 | ||
|
|
1a4daaba10 | ||
|
|
6be4ac044c | ||
|
|
c0aabc7d0b | ||
|
|
4df0bbd814 | ||
|
|
5e6e746951 | ||
|
|
a55a27acf6 | ||
|
|
8046992e08 | ||
|
|
de56e88a00 | ||
|
|
fa6fb1b53d | ||
|
|
5a1f0bdda5 | ||
|
|
405748c744 | ||
|
|
cc90b048af | ||
|
|
6886228992 | ||
|
|
4569c905a5 | ||
|
|
453e81e895 | ||
|
|
3af8696761 | ||
|
|
399b33822a | ||
|
|
913a296cc4 | ||
|
|
bd55569703 | ||
|
|
0e2d3f833d | ||
|
|
3646969779 | ||
|
|
d5bfb9f6aa | ||
|
|
56a2d9dc0a | ||
|
|
e90c05c8f8 | ||
|
|
08d79cc324 | ||
|
|
990b0c93a5 | ||
|
|
14d638c588 | ||
|
|
2645667257 | ||
|
|
38c2d82c88 | ||
|
|
986f5b7b4e | ||
|
|
ef76c04ae8 | ||
|
|
d4d5393faa | ||
|
|
9ffc4bf928 | ||
|
|
51b6fd6f86 | ||
|
|
be765dc4e8 | ||
|
|
7febd9328d | ||
|
|
33a80fca5a | ||
|
|
f368820b7e | ||
|
|
2f1dbc2dae | ||
|
|
3ad30217c4 | ||
|
|
4a10f2cb37 | ||
|
|
f3948295e9 | ||
|
|
42fb12027c | ||
|
|
91124d2d4f | ||
|
|
2e2ce9a57a | ||
|
|
fdcb395739 | ||
|
|
7d1fed2eb9 | ||
|
|
462a1516d2 | ||
|
|
654f70d901 | ||
|
|
31f20348c9 | ||
|
|
bd0876e2e6 | ||
|
|
a47c09e5d2 | ||
|
|
164468f990 | ||
|
|
c56b631700 | ||
|
|
2636969807 | ||
|
|
1e17303b97 | ||
|
|
7fa8d891ef | ||
|
|
c029dfe3fa | ||
|
|
c4ef120b74 | ||
|
|
8644ce1cb8 | ||
|
|
b0b3cdc21f | ||
|
|
e3d30d8a35 | ||
|
|
0ef9c189c0 | ||
|
|
18953536f1 | ||
|
|
dd02f33482 | ||
|
|
3576839199 | ||
|
|
4ed668ef93 | ||
|
|
f8344469e1 | ||
|
|
1d94fb67da | ||
|
|
9ad85127e4 | ||
|
|
3845abeff8 | ||
|
|
f95c712f95 | ||
|
|
f5c3300304 | ||
|
|
84fa83b4dd | ||
|
|
927c37470a | ||
|
|
b80fa53b2a | ||
|
|
cce5ae01f8 | ||
|
|
92ef783948 | ||
|
|
2e7c1fd2cc | ||
|
|
85164b616e | ||
|
|
c67a1ac9f7 | ||
|
|
a2cae7df29 | ||
|
|
f44e3bc9d8 | ||
|
|
03bc58116c | ||
|
|
9b88eb67a1 | ||
|
|
6e5fa7de3c | ||
|
|
d327394057 | ||
|
|
b7c06941e3 | ||
|
|
57adfc8683 | ||
|
|
e13d805197 | ||
|
|
f0635a0df4 | ||
|
|
24adb74498 | ||
|
|
16519b11aa | ||
|
|
0a666caa13 | ||
|
|
5833acbc8c | ||
|
|
e57c7b651d | ||
|
|
9a40395cdb | ||
|
|
03aecc8d0c | ||
|
|
a554b383a2 | ||
|
|
a4be1e1dcb | ||
|
|
7b498f4179 | ||
|
|
4a10c8dce7 | ||
|
|
0cc99c50a7 | ||
|
|
92fdf45f0c | ||
|
|
f3cafae030 | ||
|
|
12e5bf6036 | ||
|
|
f0ab957edb | ||
|
|
4308b28aa6 | ||
|
|
f79619ec0a | ||
|
|
5dd147123a | ||
|
|
63d6ab2006 | ||
|
|
1951e0f5e8 | ||
|
|
fb69b89ef9 | ||
|
|
2d84dd4fbd | ||
|
|
d760229abc | ||
|
|
ffb38a21e3 | ||
|
|
85a2f48c80 | ||
|
|
9f208f711e | ||
|
|
ea2637395c | ||
|
|
0ca6026124 | ||
|
|
b8d5a341f4 | ||
|
|
014a8f9222 | ||
|
|
9599b3f584 | ||
|
|
ca31b79cc0 | ||
|
|
675e858050 | ||
|
|
c04517bf04 | ||
|
|
eb12ef1dcd | ||
|
|
03f50f560b | ||
|
|
c1387c5692 | ||
|
|
a4a9221d95 | ||
|
|
7ed0036af0 | ||
|
|
1489393489 | ||
|
|
c4939368ae | ||
|
|
ab59ebe4f2 | ||
|
|
f3701ab837 | ||
|
|
f48f78fc58 | ||
|
|
ddb1785698 | ||
|
|
963b0e23ee | ||
|
|
2a9ae83270 | ||
|
|
788507e046 | ||
|
|
60001ad6b4 | ||
|
|
6c9d5c7370 | ||
|
|
1c3a02b2c8 | ||
|
|
28dc10ed98 | ||
|
|
0d49295b94 | ||
|
|
679784735e | ||
|
|
99f62e850f | ||
|
|
001b9abce9 | ||
|
|
248ca3ff76 | ||
|
|
704ee58846 | ||
|
|
f26948dde2 | ||
|
|
3cac2d3ab7 | ||
|
|
82793a02d3 | ||
|
|
00cbe3ec3b | ||
|
|
8ecdab817a | ||
|
|
94ae9fd4a7 | ||
|
|
75c8f32186 | ||
|
|
69621fe6f9 | ||
|
|
81f4c1bd7c | ||
|
|
bb7cfb7dc2 | ||
|
|
b98ea32add | ||
|
|
17157f2973 | ||
|
|
478b17a4dc | ||
|
|
f14225f7e4 | ||
|
|
8a8f7a9929 | ||
|
|
7d490d4886 | ||
|
|
af13b2c745 | ||
|
|
a164d83dad | ||
|
|
d10e340823 | ||
|
|
e7bb9d14b2 | ||
|
|
4e7c096c34 | ||
|
|
0f5ce12dad | ||
|
|
2f4e92a728 | ||
|
|
b047ed82b6 | ||
|
|
82f165625f | ||
|
|
2f5aa3bd0e | ||
|
|
63ef8e3f17 | ||
|
|
f1eaeb4ed2 | ||
|
|
a2ffe8e938 | ||
|
|
355d2ad6fc | ||
|
|
f089b4b077 | ||
|
|
73607b85b7 | ||
|
|
7b022f3afa | ||
|
|
0a2e4f761b | ||
|
|
ddd73cd73b | ||
|
|
efdb837a50 | ||
|
|
90abc2d2f3 | ||
|
|
f60938aab0 | ||
|
|
af7c752fc6 | ||
|
|
a80989952a | ||
|
|
8d74550609 | ||
|
|
d5774b3da4 | ||
|
|
7f7002f36c | ||
|
|
b62ba4a985 | ||
|
|
2b3370887e | ||
|
|
26427d2af0 | ||
|
|
d502b33ef4 | ||
|
|
210cf43574 | ||
|
|
d74e37d4ea | ||
|
|
ea8825996d | ||
|
|
336368195e | ||
|
|
70206b1635 | ||
|
|
191ae61b02 | ||
|
|
f6bf8d912a | ||
|
|
30b7407597 | ||
|
|
58bf73d4bf | ||
|
|
ec61073560 | ||
|
|
b0f81edf96 | ||
|
|
cc8ef47747 | ||
|
|
5d05bf0aba | ||
|
|
1d18a54b5e | ||
|
|
a4605ec844 | ||
|
|
9f58400ba8 | ||
|
|
ec8277b0e4 | ||
|
|
8c7763acf6 | ||
|
|
8e426e60c9 | ||
|
|
3a7f931a3a | ||
|
|
314ca6c361 | ||
|
|
12f0887d28 | ||
|
|
a986ae247d | ||
|
|
9507669b42 | ||
|
|
715155a1e9 | ||
|
|
32c5679039 | ||
|
|
89d211b061 | ||
|
|
f938fd4540 | ||
|
|
61e4fbf152 | ||
|
|
fda38e7cbb | ||
|
|
c0a3b02fb4 | ||
|
|
80482aae34 | ||
|
|
c19085862a | ||
|
|
9672d55c1e | ||
|
|
e043643a54 | ||
|
|
ade108f01c | ||
|
|
6b08fd5e8d | ||
|
|
3c8bbddb5f | ||
|
|
4c02b4a6c3 | ||
|
|
786726e626 | ||
|
|
1f97fe463e | ||
|
|
5a3e3ba90f | ||
|
|
154aae5093 | ||
|
|
3af88b0699 | ||
|
|
0fb620c697 | ||
|
|
7d66965d0b | ||
|
|
acca6a7161 | ||
|
|
ad2f228048 | ||
|
|
3a7dcd9736 | ||
|
|
ca15b44d0f | ||
|
|
bf9c466855 | ||
|
|
abcbacaf69 | ||
|
|
ffceff7498 | ||
|
|
25ac1db646 | ||
|
|
c6d6dae352 | ||
|
|
aa4f771df2 | ||
|
|
4715ad9009 | ||
|
|
a4e5a5ac78 | ||
|
|
f587efde60 | ||
|
|
d57f4c54d8 | ||
|
|
b9c74e0b07 | ||
|
|
aebefe09b5 | ||
|
|
78c13d252e | ||
|
|
8270c85ffd | ||
|
|
7a2279e433 | ||
|
|
3ef519d139 | ||
|
|
85eac05a38 | ||
|
|
8af79841f7 | ||
|
|
afe236d9ac | ||
|
|
90f8e0b70e | ||
|
|
0432c1d7e3 | ||
|
|
08820a2061 | ||
|
|
4f62978de5 | ||
|
|
3ab8238cfb | ||
|
|
43a9db55b1 | ||
|
|
08fdcad282 | ||
|
|
7df6e0dc68 | ||
|
|
5c1c5880b6 | ||
|
|
f99efbf72e | ||
|
|
e90b60eecd | ||
|
|
1205e15be2 | ||
|
|
32f39f35f6 | ||
|
|
9591b86430 | ||
|
|
b54c6023eb | ||
|
|
00ba5d36b9 | ||
|
|
89eeaf1390 | ||
|
|
3f18b93980 | ||
|
|
96cb056ec9 | ||
|
|
1042f1ac8b | ||
|
|
dd232594a5 | ||
|
|
5762e5fdfb | ||
|
|
0f1c154a1a | ||
|
|
ad1fcbe8b2 | ||
|
|
877440b1e6 | ||
|
|
ca02a58519 | ||
|
|
2834f2d8b6 | ||
|
|
cc4f285765 | ||
|
|
2efae5a9c3 | ||
|
|
416cc72498 | ||
|
|
d66584f35c | ||
|
|
2b80d7ad8d | ||
|
|
be3445b78a | ||
|
|
ab63042dfa | ||
|
|
af8622e8ff | ||
|
|
53d083ec58 | ||
|
|
32f74b60ef | ||
|
|
0885be1dc0 | ||
|
|
8b58b208ca | ||
|
|
3188aeaac4 | ||
|
|
e83edd30de | ||
|
|
04333699a0 | ||
|
|
95c9f76e32 | ||
|
|
2830b6a899 | ||
|
|
54c02ef4b4 | ||
|
|
dacce7c864 | ||
|
|
08c5f9aed8 | ||
|
|
fb26ce5c24 | ||
|
|
91d869e61a | ||
|
|
d168bdd0c8 | ||
|
|
6a104c1938 | ||
|
|
2d5525e887 | ||
|
|
a35ad186e3 | ||
|
|
dd469adf29 | ||
|
|
4ffc5cbe6a | ||
|
|
f3449dcfcb | ||
|
|
4e8c7b9216 | ||
|
|
473078986c | ||
|
|
5b74c58a43 | ||
|
|
a72a4e1fcb | ||
|
|
13dd72029f | ||
|
|
fc4fc5762b | ||
|
|
d515903172 | ||
|
|
97a6a51bec | ||
|
|
9222c79043 | ||
|
|
38254a8695 | ||
|
|
9b1aede911 | ||
|
|
ac71888236 | ||
|
|
3f31b52daf | ||
|
|
e3230aad55 | ||
|
|
ac40aa9b02 | ||
|
|
fa480e61a1 | ||
|
|
c19644882b | ||
|
|
e29eccd741 | ||
|
|
4dd80567e1 | ||
|
|
2654c79548 | ||
|
|
3b357340fd | ||
|
|
6c8e74bac9 | ||
|
|
182714b6b8 | ||
|
|
8410be848f | ||
|
|
3f9b9962c3 | ||
|
|
ec1f4bc33d | ||
|
|
f2a0d36b34 | ||
|
|
fc2b86df4f | ||
|
|
0cd0792b65 | ||
|
|
122ee5ab7d | ||
|
|
8270ef71b2 | ||
|
|
d59a13079f | ||
|
|
ed59bd22f3 | ||
|
|
8d32c870fc | ||
|
|
bea2d4fb34 | ||
|
|
fcb97bf78a | ||
|
|
161a78dc23 | ||
|
|
4b7bddb481 | ||
|
|
0879b1b38b | ||
|
|
8bf81a581a | ||
|
|
759da58648 | ||
|
|
ec4a4fe7df | ||
|
|
4dedd62aea | ||
|
|
cf4030ed94 | ||
|
|
35df887307 | ||
|
|
d41adaa51b | ||
|
|
9373a45870 | ||
|
|
a2db88c9c3 | ||
|
|
a26d7bf9e8 | ||
|
|
4225047b06 | ||
|
|
0a9ed9977b | ||
|
|
fc1b4ce88e | ||
|
|
6295c96852 | ||
|
|
73418b5c16 | ||
|
|
98d530f0b1 | ||
|
|
e45ed0ed8c | ||
|
|
fc04e2db89 | ||
|
|
cfaacc5a76 | ||
|
|
b91c3edb16 | ||
|
|
0a503a0bed | ||
|
|
015e4d66b2 | ||
|
|
da4c135e23 | ||
|
|
588851ac1c | ||
|
|
2b7d7061f9 | ||
|
|
24bc3b6d76 | ||
|
|
cd52a152f6 | ||
|
|
3ecf8be873 | ||
|
|
f38466db11 | ||
|
|
3b8d5c0609 | ||
|
|
60539aaa56 | ||
|
|
6d53e67670 | ||
|
|
9771e63247 | ||
|
|
02c9bcabe0 | ||
|
|
69c8a09d43 | ||
|
|
9ae229a0d5 | ||
|
|
38921fad17 | ||
|
|
633858a218 | ||
|
|
70262b38f8 | ||
|
|
f96dedf3a9 | ||
|
|
1ce0493488 | ||
|
|
027a0d2ee6 | ||
|
|
9c8e08811b | ||
|
|
c1c09f3342 | ||
|
|
05b82a22bc | ||
|
|
067aa2ced0 | ||
|
|
f18733fd09 | ||
|
|
a981f657ec | ||
|
|
81426ae800 | ||
|
|
4771452590 | ||
|
|
10bfaf0e4b | ||
|
|
9e25ec2f07 | ||
|
|
90fb908376 | ||
|
|
a08c0753e7 | ||
|
|
fc22cb2bf0 | ||
|
|
fbaae2e493 | ||
|
|
c86390e139 | ||
|
|
d890642c28 | ||
|
|
6620a3cd90 | ||
|
|
7e181280b3 | ||
|
|
53499e6b14 | ||
|
|
3f948ae501 | ||
|
|
2090887a07 | ||
|
|
a7bfae061c | ||
|
|
fb1926a571 | ||
|
|
c215158d67 | ||
|
|
74152562fe | ||
|
|
e2af871a5a | ||
|
|
2ad116649a | ||
|
|
03aa086e0b | ||
|
|
a335857695 | ||
|
|
95a88b9d5d | ||
|
|
2501783d62 | ||
|
|
7367f0ffbd | ||
|
|
088442e9c1 | ||
|
|
ec14c6b2dc | ||
|
|
7eb033e71a | ||
|
|
1a700c1212 | ||
|
|
78fbde0e1f | ||
|
|
a30cc5e41e | ||
|
|
804a495d82 | ||
|
|
0a4eea4388 | ||
|
|
8471ce8d46 | ||
|
|
f9b1cf6c1c | ||
|
|
22a2887df2 | ||
|
|
02e88a31df | ||
|
|
98d5bc1285 | ||
|
|
436815f313 | ||
|
|
328ce82bae | ||
|
|
d39a048e6e | ||
|
|
67b56488d3 | ||
|
|
07397edd47 | ||
|
|
1e3bdc9c06 | ||
|
|
12bfeaa0d3 | ||
|
|
56801f9095 | ||
|
|
54b0b38900 | ||
|
|
e4660969cf | ||
|
|
1090a1612a | ||
|
|
f4baba8cc1 | ||
|
|
28fa237f87 | ||
|
|
f19f0e8193 | ||
|
|
72d6ee2446 | ||
|
|
f8dfe48653 | ||
|
|
9c9c0d991a | ||
|
|
8d2cb5fdf1 | ||
|
|
1486796973 | ||
|
|
1300f8f49f | ||
|
|
29e9c63e94 | ||
|
|
4bda6769c5 | ||
|
|
dc5c59b40b | ||
|
|
a90ef2504e | ||
|
|
0f3967e87d | ||
|
|
1a0df174c9 | ||
|
|
a1b5375e50 | ||
|
|
101fd139c7 | ||
|
|
25d5fb1655 | ||
|
|
e672042306 | ||
|
|
42ec3f9f06 | ||
|
|
a4fd148a80 | ||
|
|
e9927fb09c | ||
|
|
da31c9a708 | ||
|
|
1be8cb8e91 | ||
|
|
a6ae79faf4 | ||
|
|
61c345955e | ||
|
|
ebce6da788 | ||
|
|
9772c1caeb | ||
|
|
5661855dcc | ||
|
|
11319171be | ||
|
|
3134b59637 | ||
|
|
dfb87dce38 | ||
|
|
ce105d2350 | ||
|
|
e039397ab1 | ||
|
|
b54aadf968 | ||
|
|
e9cf074b45 | ||
|
|
c417c2011b | ||
|
|
2c3c3c9a84 | ||
|
|
f546390221 | ||
|
|
40034e056f | ||
|
|
47e9896d54 | ||
|
|
cda365f22a | ||
|
|
36e1252824 | ||
|
|
cf873d0fc5 | ||
|
|
026c50deb3 | ||
|
|
c5138eb30f | ||
|
|
0bd59998c0 | ||
|
|
7cd336081f | ||
|
|
47cc931b0f | ||
|
|
5462216bb3 | ||
|
|
fe86615625 | ||
|
|
9a74abf4cc | ||
|
|
2847f690f1 | ||
|
|
2c94e9e650 | ||
|
|
95ab2ab443 | ||
|
|
3dcfa2c475 | ||
|
|
7bab31543e | ||
|
|
08c4c2a8b5 | ||
|
|
9fcad69bf4 | ||
|
|
b406a536a9 | ||
|
|
5e8ab9ce4a | ||
|
|
562d47f12a | ||
|
|
ab6d4d7de5 | ||
|
|
4b9ad21e9e | ||
|
|
f4c233aeba | ||
|
|
90497b1e47 | ||
|
|
9b9319cbd0 | ||
|
|
fe948d6805 | ||
|
|
b17d70679f | ||
|
|
5290451a65 | ||
|
|
faadb34aff | ||
|
|
314b4530d8 | ||
|
|
843d342137 | ||
|
|
f0981964f3 | ||
|
|
da409549d4 | ||
|
|
dc7ad2afc7 | ||
|
|
7e8ea51c1a | ||
|
|
343afc2374 | ||
|
|
c2a0a2092a | ||
|
|
f74a252b95 | ||
|
|
f5cfadae67 | ||
|
|
7c1ecaf2b8 | ||
|
|
ea5edf20ba | ||
|
|
c5f8cc7816 | ||
|
|
60d75d26f0 | ||
|
|
f6402d3390 | ||
|
|
7b23a1b9a8 | ||
|
|
7714d12f7c | ||
|
|
fc813e40eb | ||
|
|
96578c3d2f | ||
|
|
f47be0808f | ||
|
|
b6d1e15a9f | ||
|
|
7d332aaa35 | ||
|
|
9ff8705cd7 | ||
|
|
76669995f6 | ||
|
|
1079e9bfaf | ||
|
|
67d85316ac | ||
|
|
7d41f4e22c | ||
|
|
cdeb0d1423 | ||
|
|
477699a102 | ||
|
|
63047d01ab | ||
|
|
5e53e64df2 | ||
|
|
89207155fd | ||
|
|
b7d9eecf86 | ||
|
|
8212994018 | ||
|
|
c283cb0ff4 | ||
|
|
a34ab9a268 | ||
|
|
2a2a2b26ef | ||
|
|
49fe2c3bb3 | ||
|
|
170942c8be | ||
|
|
438b3529ae | ||
|
|
2bff901860 | ||
|
|
d45fff3c5a | ||
|
|
8843a22854 | ||
|
|
d9ba73af44 | ||
|
|
491d5935cf | ||
|
|
85389afb3e | ||
|
|
16d75249c5 | ||
|
|
27842f4cff | ||
|
|
016afd4b2c | ||
|
|
cdb0cbdca7 | ||
|
|
da2d7ea8c0 | ||
|
|
f6278d590a | ||
|
|
3b0d14bd5d | ||
|
|
66d1f2099b | ||
|
|
1ae32c12ab | ||
|
|
c626de76ff | ||
|
|
c80792d713 | ||
|
|
a16958e35d | ||
|
|
3e4c75e41b | ||
|
|
1596174a36 | ||
|
|
f4084f069a | ||
|
|
aceee680c8 | ||
|
|
48c47bf11e | ||
|
|
8783c013e5 | ||
|
|
70069f53b1 | ||
|
|
bf665e1c14 | ||
|
|
e359a69b18 | ||
|
|
c01caefac9 | ||
|
|
2653201fe1 | ||
|
|
dadab35aee | ||
|
|
c218af8512 | ||
|
|
874ead9751 | ||
|
|
1e5308db31 | ||
|
|
83c8381c19 | ||
|
|
b5f5117555 | ||
|
|
b0a4f5c981 | ||
|
|
1da50abe2f | ||
|
|
6d69ff0bda | ||
|
|
7179d135fa | ||
|
|
3e4523e1ef | ||
|
|
69a65dd63f | ||
|
|
3c25a9b40d | ||
|
|
f43d9b5e88 | ||
|
|
5826bc80bc | ||
|
|
6563b03299 | ||
|
|
406ff55c7d | ||
|
|
8882bbe617 | ||
|
|
769a886b93 | ||
|
|
f2fc002f5c | ||
|
|
836998c9e9 | ||
|
|
f90a5b14ad | ||
|
|
6004bdf012 | ||
|
|
d7610a7c55 | ||
|
|
2ecc1e06cf | ||
|
|
dcc017d681 | ||
|
|
ea401f6556 | ||
|
|
09fbe288d8 | ||
|
|
7786175d32 | ||
|
|
f558516f40 | ||
|
|
d8c46d94df | ||
|
|
f64e335735 | ||
|
|
b263ba7df2 | ||
|
|
d2a68d92a3 | ||
|
|
4cbff8e1a1 | ||
|
|
33ffafc7d6 | ||
|
|
4780c4bb18 | ||
|
|
c61561aab2 | ||
|
|
9466862560 | ||
|
|
931dd4e301 | ||
|
|
e52475cac7 | ||
|
|
afa9fc051e | ||
|
|
3cbf49cba7 | ||
|
|
5deb7e8c2d | ||
|
|
2003222691 | ||
|
|
08913bf96b | ||
|
|
15c047077a | ||
|
|
260bcfd532 | ||
|
|
3deb295d29 | ||
|
|
f56ac542bc | ||
|
|
ebd6d3ef19 | ||
|
|
808ed75858 | ||
|
|
d2c704884e | ||
|
|
91a5b1ce52 | ||
|
|
bec30efec5 | ||
|
|
2cd24cfa9e | ||
|
|
963fb84cb7 | ||
|
|
65729c4acc | ||
|
|
9b8e8ff17a | ||
|
|
8ea9c68be0 | ||
|
|
282774cbdf | ||
|
|
eb504ae866 | ||
|
|
0f1520c392 | ||
|
|
009eaa3a59 | ||
|
|
80232ff8e8 | ||
|
|
9938af1580 | ||
|
|
6935a4a2e4 | ||
|
|
eef5024354 | ||
|
|
bb6b469768 | ||
|
|
d4c2dfedb2 | ||
|
|
3434ad9ca0 | ||
|
|
937219dd91 | ||
|
|
5bdd1ebdbc | ||
|
|
17f3f24652 | ||
|
|
d80b37854a | ||
|
|
cbfa21ce45 | ||
|
|
3665e65986 | ||
|
|
0daca0276b | ||
|
|
8769118471 | ||
|
|
863dbd2f4d | ||
|
|
eb00b1a1b9 | ||
|
|
953ba9b8eb | ||
|
|
aa9d43a3fc | ||
|
|
9d5cbf7e51 | ||
|
|
1744f21084 | ||
|
|
adf05bd11d | ||
|
|
3d205c3597 | ||
|
|
9f9b861769 | ||
|
|
6025d3d843 | ||
|
|
3cf03f3018 | ||
|
|
1c0caf9a81 | ||
|
|
4dc79f655f | ||
|
|
5a06d57d7e | ||
|
|
84ba7f57d0 | ||
|
|
8af30611f3 | ||
|
|
6e27476faa | ||
|
|
acddb3b939 | ||
|
|
b6193be1ef | ||
|
|
e7b1a093a3 | ||
|
|
5be8c7f85f | ||
|
|
b751ed6c6a | ||
|
|
d16ca86782 | ||
|
|
b92d6692ce | ||
|
|
dab2ff402f | ||
|
|
51252b06b9 | ||
|
|
1fd84ad9d5 | ||
|
|
c4d6b2ed0f | ||
|
|
71a239825a | ||
|
|
f72e0a8ddf | ||
|
|
069bc3a905 | ||
|
|
0307d78236 | ||
|
|
e543dc4278 | ||
|
|
029ef1795f | ||
|
|
12433fdba4 | ||
|
|
0a66adf707 | ||
|
|
b5aab26c38 | ||
|
|
416173a03c | ||
|
|
e82361c893 | ||
|
|
7d3bf03404 | ||
|
|
eb50b8319b | ||
|
|
cfd2d60575 | ||
|
|
d4c3fb8261 | ||
|
|
7940b71ffe | ||
|
|
6dd04b1a43 | ||
|
|
399a6854c5 | ||
|
|
8eded7081c | ||
|
|
3bdebba18d | ||
|
|
8aab340a2a | ||
|
|
0138228309 | ||
|
|
3912028318 | ||
|
|
59cea11ef5 | ||
|
|
d9c12abd2d | ||
|
|
4b981caa53 | ||
|
|
735ff8831d | ||
|
|
6529c3edd3 | ||
|
|
8840996a30 | ||
|
|
d35e549dbf | ||
|
|
7195f07b3d | ||
|
|
351542257e | ||
|
|
8927aa8e02 | ||
|
|
717d1ed995 | ||
|
|
3773843094 | ||
|
|
9bee0190d2 | ||
|
|
60c4619862 | ||
|
|
9ffbb3ad02 | ||
|
|
350b81db99 | ||
|
|
412b165dc9 | ||
|
|
531d7c687e | ||
|
|
e866caa900 | ||
|
|
ec466067b2 | ||
|
|
59b6f78c71 | ||
|
|
7757c85d4f | ||
|
|
3077eecb97 | ||
|
|
e6fc0f6724 | ||
|
|
42f817abf5 | ||
|
|
3b0c9f8b48 | ||
|
|
738304f438 |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.12.0
|
||||
current_version = 0.14.4b1
|
||||
parse = (?P<major>\d+)
|
||||
\.(?P<minor>\d+)
|
||||
\.(?P<patch>\d+)
|
||||
@@ -22,5 +22,15 @@ first_value = 1
|
||||
|
||||
[bumpversion:file:setup.py]
|
||||
|
||||
[bumpversion:file:dbt/version.py]
|
||||
[bumpversion:file:core/setup.py]
|
||||
|
||||
[bumpversion:file:core/dbt/version.py]
|
||||
|
||||
[bumpversion:file:plugins/postgres/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/redshift/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/snowflake/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/bigquery/setup.py]
|
||||
|
||||
|
||||
@@ -1,114 +1,102 @@
|
||||
version: 2
|
||||
jobs:
|
||||
unit:
|
||||
docker: &py36
|
||||
- image: python:3.6
|
||||
docker: &test_and_postgres
|
||||
- image: fishtownjacob/test-container
|
||||
- image: postgres
|
||||
name: database
|
||||
environment: &pgenv
|
||||
POSTGRES_USER: "root"
|
||||
POSTGRES_PASSWORD: "password"
|
||||
POSTGRES_DB: "dbt"
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python-dev python3-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run: &setupdb
|
||||
name: Setup postgres
|
||||
command: bash test/setup_db.sh
|
||||
environment:
|
||||
PGHOST: 127.0.0.1
|
||||
PGHOST: database
|
||||
PGUSER: root
|
||||
PGPASSWORD: password
|
||||
PGDATABASE: postgres
|
||||
- run: tox -e pep8,unit-py27,unit-py36
|
||||
- run: tox -e flake8,unit-py27,unit-py36
|
||||
integration-postgres-py36:
|
||||
docker: *py36
|
||||
docker: *test_and_postgres
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python3-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run: *setupdb
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-postgres-py36
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-snowflake-py36:
|
||||
docker: *py36
|
||||
docker: &test_only
|
||||
- image: fishtownjacob/test-container
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python3-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-snowflake-py36
|
||||
no_output_timeout: 1h
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-redshift-py36:
|
||||
docker: *py36
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python3-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-redshift-py36
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-bigquery-py36:
|
||||
docker: *py36
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python3-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-bigquery-py36
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-postgres-py27:
|
||||
docker: &py27
|
||||
- image: python:2.7
|
||||
- image: postgres
|
||||
environment: *pgenv
|
||||
docker: *test_and_postgres
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run: *setupdb
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-postgres-py27
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-snowflake-py27:
|
||||
docker: *py27
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-snowflake-py27
|
||||
no_output_timeout: 1h
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-redshift-py27:
|
||||
docker: *py27
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-redshift-py27
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
integration-bigquery-py27:
|
||||
docker: *py27
|
||||
docker: *test_only
|
||||
steps:
|
||||
- checkout
|
||||
- run: apt-get update && apt-get install -y python-dev postgresql
|
||||
- run: echo 127.0.0.1 database | tee -a /etc/hosts
|
||||
- run: pip install virtualenvwrapper tox
|
||||
- run:
|
||||
name: Run tests
|
||||
command: tox -e integration-bigquery-py27
|
||||
- store_artifacts:
|
||||
path: ./logs
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
[report]
|
||||
include =
|
||||
dbt/*
|
||||
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@@ -0,0 +1 @@
|
||||
*
|
||||
34
.github/ISSUE_TEMPLATE.md
vendored
34
.github/ISSUE_TEMPLATE.md
vendored
@@ -1,34 +0,0 @@
|
||||
Please make sure to fill out either the issue template or the feature template and delete the other one!
|
||||
|
||||
## Issue
|
||||
|
||||
### Issue description
|
||||
In general terms, please describe the issue. What command did you run?
|
||||
|
||||
### Results
|
||||
What happened? What did you expect to happen?
|
||||
|
||||
### System information
|
||||
The output of `dbt --version`:
|
||||
```
|
||||
<output goes here>
|
||||
```
|
||||
|
||||
The operating system you're running on:
|
||||
|
||||
The python version you're using (probably the output of `python --version`)
|
||||
|
||||
### Steps to reproduce
|
||||
In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example models, etc are all very helpful here.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Feature
|
||||
|
||||
### Feature description
|
||||
Please describe the feature you would like dbt to have. Please provide any details, relevant documentation links, StackOverflow links, etc here.
|
||||
|
||||
### Who will this benefit?
|
||||
What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
|
||||
41
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
41
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Report a bug or an issue you've found with dbt
|
||||
title: ''
|
||||
labels: bug, triage
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
### Describe the bug
|
||||
A clear and concise description of what the bug is. What command did you run? What happened?
|
||||
|
||||
### Steps To Reproduce
|
||||
In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here.
|
||||
|
||||
### Expected behavior
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
### Screenshots and log output
|
||||
If applicable, add screenshots or log output to help explain your problem.
|
||||
|
||||
### System information
|
||||
**Which database are you using dbt with?**
|
||||
- [ ] postgres
|
||||
- [ ] redshift
|
||||
- [ ] bigquery
|
||||
- [ ] snowflake
|
||||
- [ ] other (specify: ____________)
|
||||
|
||||
|
||||
**The output of `dbt --version`:**
|
||||
```
|
||||
<output goes here>
|
||||
```
|
||||
|
||||
**The operating system you're using:**
|
||||
|
||||
**The output of `python --version`:**
|
||||
|
||||
### Additional context
|
||||
Add any other context about the problem here.
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for dbt
|
||||
title: ''
|
||||
labels: enhancement, triage
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
### Describe the feature
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
### Describe alternatives you've considered
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
### Additional context
|
||||
Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here.
|
||||
|
||||
### Who will this benefit?
|
||||
What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
|
||||
323
CHANGELOG.md
323
CHANGELOG.md
@@ -1,3 +1,314 @@
|
||||
## dbt 0.14.3 (October 10, 2019)
|
||||
|
||||
This is a bugfix release.
|
||||
|
||||
### Fixes:
|
||||
- Fix for `dictionary changed size during iteration` race condition ([#1740](https://github.com/fishtown-analytics/dbt/issues/1740), [#1750](https://github.com/fishtown-analytics/dbt/pull/1750))
|
||||
- Fix upper bound on jsonschema dependency to 3.1.1 ([#1817](https://github.com/fishtown-analytics/dbt/issues/1817), [#1819](https://github.com/fishtown-analytics/dbt/pull/1819))
|
||||
|
||||
### Under the hood:
|
||||
- Provide a programmatic method for validating profile targets ([#1754](https://github.com/fishtown-analytics/dbt/issues/1754), [#1775](https://github.com/fishtown-analytics/dbt/pull/1775))
|
||||
|
||||
|
||||
## dbt 0.14.2 (September 13, 2019)
|
||||
|
||||
### Overview
|
||||
|
||||
This is a bugfix release.
|
||||
|
||||
### Fixes:
|
||||
- Fix for dbt hanging at the end of execution in `dbt source snapshot-freshness` tasks ([#1728](https://github.com/fishtown-analytics/dbt/issues/1728), [#1729](https://github.com/fishtown-analytics/dbt/pull/1729))
|
||||
- Fix for broken "packages" and "tags" selector dropdowns in the dbt Documentation website ([docs#47](https://github.com/fishtown-analytics/dbt-docs/issues/47), [#1726](https://github.com/fishtown-analytics/dbt/pull/1726))
|
||||
|
||||
|
||||
## dbt 0.14.1 (September 3, 2019)
|
||||
|
||||
### Overview
|
||||
|
||||
This is primarily a bugfix release which contains a few minor improvements too. Note: this release includes an important change in how the `check` snapshot strategy works. See [#1614](https://github.com/fishtown-analytics/dbt/pull/1614) for more information. If you are using snapshots with the `check` strategy on dbt v0.14.0, it is strongly recommended that you upgrade to 0.14.1 at your soonest convenience.
|
||||
|
||||
### Breaking changes
|
||||
- The undocumented `macros` attribute was removed from the `graph` context variable ([#1615](https://github.com/fishtown-analytics/dbt/pull/1615))
|
||||
|
||||
### Features:
|
||||
- Summarize warnings at the end of dbt runs ([#1597](https://github.com/fishtown-analytics/dbt/issues/1597), [#1654](https://github.com/fishtown-analytics/dbt/pull/1654))
|
||||
- Speed up catalog generation on postgres by using avoiding use of the `information_schema` ([#1540](https://github.com/fishtown-analytics/dbt/pull/1540))
|
||||
- Docs site updates ([#1621](https://github.com/fishtown-analytics/dbt/issues/1621))
|
||||
- Fix for incorrect node selection logic in DAG view ([docs#38](https://github.com/fishtown-analytics/dbt-docs/pull/38))
|
||||
- Update page title, meta tags, and favicon ([docs#39](https://github.com/fishtown-analytics/dbt-docs/pull/39))
|
||||
- Bump the version of `dbt-styleguide`, changing file tree colors from orange to black :)
|
||||
- Add environment variables for macro debugging flags ([#1628](https://github.com/fishtown-analytics/dbt/issues/1628), [#1629](https://github.com/fishtown-analytics/dbt/pull/1629))
|
||||
- Speed up node selection by making it linear, rather than quadratic, in complexity ([#1611](https://github.com/fishtown-analytics/dbt/issues/1611), [#1615](https://github.com/fishtown-analytics/dbt/pull/1615))
|
||||
- Specify the `application` field in Snowflake connections ([#1622](https://github.com/fishtown-analytics/dbt/issues/1622), [#1623](https://github.com/fishtown-analytics/dbt/pull/1623))
|
||||
- Add support for clustering on Snowflake ([#634](https://github.com/fishtown-analytics/dbt/issues/634), [#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689)) ([docs](https://docs.getdbt.com/docs/snowflake-configs#section-configuring-table-clustering))
|
||||
- Add support for job priority on BigQuery ([#1456](https://github.com/fishtown-analytics/dbt/issues/1456), [#1673](https://github.com/fishtown-analytics/dbt/pull/1673)) ([docs](https://docs.getdbt.com/docs/profile-bigquery#section-priority))
|
||||
- Add `node.config` and `node.tags` to the `generate_schema_name` and `generate_alias_name` macro context ([#1700](https://github.com/fishtown-analytics/dbt/issues/1700), [#1701](https://github.com/fishtown-analytics/dbt/pull/1701))
|
||||
|
||||
### Fixes:
|
||||
- Fix for reused `check_cols` values in snapshots ([#1614](https://github.com/fishtown-analytics/dbt/pull/1614), [#1709](https://github.com/fishtown-analytics/dbt/pull/1709))
|
||||
- Fix for rendering column descriptions in sources ([#1619](https://github.com/fishtown-analytics/dbt/issues/1619), [#1633](https://github.com/fishtown-analytics/dbt/pull/1633))
|
||||
- Fix for `is_incremental()` returning True for models that are not materialized as incremental models ([#1249](https://github.com/fishtown-analytics/dbt/issues/1249), [#1608](https://github.com/fishtown-analytics/dbt/pull/1608))
|
||||
- Fix for serialization of BigQuery results which contain nested or repeated records ([#1626](https://github.com/fishtown-analytics/dbt/issues/1626), [#1638](https://github.com/fishtown-analytics/dbt/pull/1638))
|
||||
- Fix for loading seed files which contain non-ascii characters ([#1632](https://github.com/fishtown-analytics/dbt/issues/1632), [#1644](https://github.com/fishtown-analytics/dbt/pull/1644))
|
||||
- Fix for creation of user cookies in incorrect directories when `--profile-dir` or `$DBT_PROFILES_DIR` is provided ([#1645](https://github.com/fishtown-analytics/dbt/issues/1645), [#1656](https://github.com/fishtown-analytics/dbt/pull/1656))
|
||||
- Fix for error handling when transactions are being rolled back ([#1647](https://github.com/fishtown-analytics/dbt/pull/1647))
|
||||
- Fix for incorrect references to `dbt.exceptions` in jinja code ([#1569](https://github.com/fishtown-analytics/dbt/issues/1569), [#1609](https://github.com/fishtown-analytics/dbt/pull/1609))
|
||||
- Fix for duplicated schema creation due to case-sensitive comparison ([#1651](https://github.com/fishtown-analytics/dbt/issues/1651), [#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
|
||||
- Fix for "schema stub" created automatically by dbt ([#913](https://github.com/fishtown-analytics/dbt/issues/913), [#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
|
||||
- Fix for incremental merge query on old versions of postgres (<=9.6) ([#1665](https://github.com/fishtown-analytics/dbt/issues/1665), [#1666](https://github.com/fishtown-analytics/dbt/pull/1666))
|
||||
- Fix for serializing results of queries which return `TIMESTAMP_TZ` columns on Snowflake in the RPC server ([#1670](https://github.com/fishtown-analytics/dbt/pull/1670))
|
||||
- Fix typo in InternalException ([#1640](https://github.com/fishtown-analytics/dbt/issues/1640), [#1672](https://github.com/fishtown-analytics/dbt/pull/1672))
|
||||
- Fix typo in CLI help for snapshot migration subcommand ([#1664](https://github.com/fishtown-analytics/dbt/pull/1664))
|
||||
- Fix for error handling logic when empty queries are submitted on Snowflake ([#1693](https://github.com/fishtown-analytics/dbt/issues/1693), [#1694](https://github.com/fishtown-analytics/dbt/pull/1694))
|
||||
- Fix for non-atomic column expansion logic in Snowflake incremental models and snapshots ([#1687](https://github.com/fishtown-analytics/dbt/issues/1687), [#1690](https://github.com/fishtown-analytics/dbt/pull/1690))
|
||||
- Fix for unprojected `count(*)` expression injected by custom data tests ([#1688](https://github.com/fishtown-analytics/dbt/pull/1688))
|
||||
- Fix for `dbt run` and `dbt docs generate` commands when running against Panoply Redshift ([#1479](https://github.com/fishtown-analytics/dbt/issues/1479), [#1686](https://github.com/fishtown-analytics/dbt/pull/1686))
|
||||
|
||||
|
||||
### Contributors:
|
||||
Thanks for your contributions to dbt!
|
||||
|
||||
- [@levimalott](https://github.com/levimalott) ([#1647](https://github.com/fishtown-analytics/dbt/pull/1647))
|
||||
- [@aminamos](https://github.com/aminamos) ([#1609](https://github.com/fishtown-analytics/dbt/pull/1609))
|
||||
- [@elexisvenator](https://github.com/elexisvenator) ([#1540](https://github.com/fishtown-analytics/dbt/pull/1540))
|
||||
- [@edmundyan](https://github.com/edmundyan) ([#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
|
||||
- [@vitorbaptista](https://github.com/vitorbaptista) ([#1664](https://github.com/fishtown-analytics/dbt/pull/1664))
|
||||
- [@sjwhitworth](https://github.com/sjwhitworth) ([#1672](https://github.com/fishtown-analytics/dbt/pull/1672), [#1673](https://github.com/fishtown-analytics/dbt/pull/1673))
|
||||
- [@mikaelene](https://github.com/mikaelene) ([#1688](https://github.com/fishtown-analytics/dbt/pull/1688), [#1709](https://github.com/fishtown-analytics/dbt/pull/1709))
|
||||
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689))
|
||||
|
||||
|
||||
## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)
|
||||
|
||||
### Overview
|
||||
- Replace Archives with Snapshots ([docs](https://docs.getdbt.com/v0.14/docs/snapshots), [migration guide](https://docs.getdbt.com/v0.14/docs/upgrading-to-014))
|
||||
- Add three new top-level commands:
|
||||
- `dbt ls` ([docs](https://docs.getdbt.com/v0.14/docs/list))
|
||||
- `dbt run-operation` ([docs](https://docs.getdbt.com/v0.14/docs/run-operation))
|
||||
- `dbt rpc` ([docs](https://docs.getdbt.com/v0.14/docs/rpc))
|
||||
- Support the specification of severity levels for schema and data tests ([docs](https://docs.getdbt.com/v0.14/docs/testing#section-test-severity))
|
||||
- Many new quality of life improvements and bugfixes
|
||||
|
||||
### Breaking changes
|
||||
- Stub out adapter methods at parse-time to speed up parsing ([#1413](https://github.com/fishtown-analytics/dbt/pull/1413))
|
||||
- Removed support for the `--non-destructive` flag ([#1419](https://github.com/fishtown-analytics/dbt/pull/1419), [#1415](https://github.com/fishtown-analytics/dbt/issues/1415))
|
||||
- Removed support for the `sql_where` config to incremental models ([#1408](https://github.com/fishtown-analytics/dbt/pull/1408), [#1351](https://github.com/fishtown-analytics/dbt/issues/1351))
|
||||
- Changed `expand_target_column_types` to take a Relation instead of a string ([#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
|
||||
- Replaced Archives with Snapshots
|
||||
- Normalized meta-column names in Snapshot tables ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#251](https://github.com/fishtown-analytics/dbt/issues/251))
|
||||
|
||||
### Features
|
||||
- Add `run-operation` command which invokes macros directly from the CLI ([#1328](https://github.com/fishtown-analytics/dbt/pull/1328)) ([docs](https://docs.getdbt.com/v0.14/docs/run-operation))
|
||||
- Add a `dbt ls` command which lists resources in your project ([#1436](https://github.com/fishtown-analytics/dbt/pull/1436), [#467](https://github.com/fishtown-analytics/dbt/issues/467)) ([docs](https://docs.getdbt.com/v0.14/docs/list))
|
||||
- Add Snapshots, an improvement over Archives ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1175](https://github.com/fishtown-analytics/dbt/issues/1175)) ([docs](https://docs.getdbt.com/v0.14/docs/snapshots))
|
||||
- Add the 'check' snapshot strategy ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#706](https://github.com/fishtown-analytics/dbt/issues/706))
|
||||
- Support Snapshots across logical databases ([#1455](https://github.com/fishtown-analytics/dbt/issues/1455))
|
||||
- Implement Snapshots using a merge statement where supported ([#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
|
||||
- Support Snapshot selection using `--select` ([#1520](https://github.com/fishtown-analytics/dbt/pull/1520), [#1512](https://github.com/fishtown-analytics/dbt/issues/1512))
|
||||
- Add an RPC server via `dbt rpc` ([#1301](https://github.com/fishtown-analytics/dbt/pull/1301), [#1274](https://github.com/fishtown-analytics/dbt/issues/1274)) ([docs](https://docs.getdbt.com/v0.14/docs/rpc))
|
||||
- Add `ps` and `kill` commands to the rpc server ([#1380](https://github.com/fishtown-analytics/dbt/pull/1380/), [#1369](https://github.com/fishtown-analytics/dbt/issues/1369), [#1370](https://github.com/fishtown-analytics/dbt/issues/1370))
|
||||
- Add support for ephemeral nodes to the rpc server ([#1373](https://github.com/fishtown-analytics/dbt/pull/1373), [#1368](https://github.com/fishtown-analytics/dbt/issues/1368))
|
||||
- Add support for inline macros to the rpc server ([#1375](https://github.com/fishtown-analytics/dbt/pull/1375), [#1372](https://github.com/fishtown-analytics/dbt/issues/1372), [#1348](https://github.com/fishtown-analytics/dbt/pull/1348))
|
||||
- Improve error handling in the rpc server ([#1341](https://github.com/fishtown-analytics/dbt/pull/1341), [#1309](https://github.com/fishtown-analytics/dbt/issues/1309), [#1310](https://github.com/fishtown-analytics/dbt/issues/1310))
|
||||
- Made printer width configurable ([#1026](https://github.com/fishtown-analytics/dbt/issues/1026), [#1247](https://github.com/fishtown-analytics/dbt/pull/1247)) ([docs](https://docs.getdbt.com/v0.14/docs/configure-your-profile#section-additional-profile-configurations))
|
||||
- Retry package downloads from the hub.getdbt.com ([#1451](https://github.com/fishtown-analytics/dbt/issues/1451), [#1491](https://github.com/fishtown-analytics/dbt/pull/1491))
|
||||
- Add a test "severity" level, presented as a keyword argument to schema tests ([#1410](https://github.com/fishtown-analytics/dbt/pull/1410), [#1005](https://github.com/fishtown-analytics/dbt/issues/1005)) ([docs](https://docs.getdbt.com/v0.14/docs/testing#section-test-severity))
|
||||
- Add a `generate_alias_name` macro to configure alias names dynamically ([#1363](https://github.com/fishtown-analytics/dbt/pull/1363)) ([docs](https://docs.getdbt.com/v0.14/docs/using-custom-aliases#section-generate_alias_name))
|
||||
- Add a `node` argument to `generate_schema_name` to configure schema names dynamically ([#1483](https://github.com/fishtown-analytics/dbt/pull/1483), [#1463](https://github.com/fishtown-analytics/dbt/issues/1463)) ([docs](https://docs.getdbt.com/v0.14/docs/using-custom-schemas#section-generate_schema_name-arguments))
|
||||
- Use `create or replace` on Snowflake to rebuild tables and views atomically ([#1101](https://github.com/fishtown-analytics/dbt/issues/1101), [#1409](https://github.com/fishtown-analytics/dbt/pull/1409))
|
||||
- Use `merge` statement for incremental models on Snowflake ([#1414](https://github.com/fishtown-analytics/dbt/issues/1414), [#1307](https://github.com/fishtown-analytics/dbt/pull/1307), [#1409](https://github.com/fishtown-analytics/dbt/pull/1409)) ([docs](https://docs.getdbt.com/v0.14/docs/snowflake-configs#section-merge-behavior-incremental-models-))
|
||||
- Add support seed CSV files that start with a UTF-8 Byte Order Mark (BOM) ([#1452](https://github.com/fishtown-analytics/dbt/pull/1452), [#1177](https://github.com/fishtown-analytics/dbt/issues/1177))
|
||||
- Add a warning when git packages are not pinned to a version ([#1453](https://github.com/fishtown-analytics/dbt/pull/1453), [#1446](https://github.com/fishtown-analytics/dbt/issues/1446))
|
||||
- Add logging for `on-run-start` and `on-run-end hooks` to console output ([#1440](https://github.com/fishtown-analytics/dbt/pull/1440), [#696](https://github.com/fishtown-analytics/dbt/issues/696))
|
||||
- Add modules and tracking information to the rendering context for configuration files ([#1441](https://github.com/fishtown-analytics/dbt/pull/1441), [#1320](https://github.com/fishtown-analytics/dbt/issues/1320))
|
||||
- Add support for `null` vars, and distinguish `null` vars from unset vars ([#1426](https://github.com/fishtown-analytics/dbt/pull/1426), [#608](https://github.com/fishtown-analytics/dbt/issues/608))
|
||||
- Add support for the `search_path` configuration in Postgres/Redshift profiles ([#1477](https://github.com/fishtown-analytics/dbt/issues/1477), [#1476](https://github.com/fishtown-analytics/dbt/pull/1476)) ([docs (postgres)](https://docs.getdbt.com/v0.14/docs/profile-postgres), [docs (redshift)](https://docs.getdbt.com/v0.14/docs/profile-redshift))
|
||||
- Add support for persisting documentation as `descriptions` for tables and views on BigQuery ([#1031](https://github.com/fishtown-analytics/dbt/issues/1031), [#1285](https://github.com/fishtown-analytics/dbt/pull/1285)) ([docs](https://docs.getdbt.com/v0.14/docs/bigquery-configs#section-persisting-model-descriptions))
|
||||
- Add a `--project-dir` path which will invoke dbt in the specified directory ([#1549](https://github.com/fishtown-analytics/dbt/pull/1549), [#1544](https://github.com/fishtown-analytics/dbt/issues/1544))
|
||||
|
||||
### dbt docs Changes
|
||||
- Add searching by tag name ([#32](https://github.com/fishtown-analytics/dbt-docs/pull/32))
|
||||
- Add context menu link to export graph viz as a PNG ([#34](https://github.com/fishtown-analytics/dbt-docs/pull/34))
|
||||
- Fix for clicking models in left-nav while search results are open ([#31](https://github.com/fishtown-analytics/dbt-docs/pull/31))
|
||||
|
||||
### Fixes
|
||||
- Fix for unduly long timeouts when anonymous event tracking is blocked ([#1445](https://github.com/fishtown-analytics/dbt/pull/1445), [#1063](https://github.com/fishtown-analytics/dbt/issues/1063))
|
||||
- Fix for error with mostly-duplicate git urls in packages, picking the one that came first. ([#1428](https://github.com/fishtown-analytics/dbt/pull/1428), [#1084](https://github.com/fishtown-analytics/dbt/issues/1084))
|
||||
- Fix for unrendered `description` field as jinja in top-level Source specification ([#1484](https://github.com/fishtown-analytics/dbt/issues/1484), [#1494](https://github.com/fishtown-analytics/dbt/issues/1494))
|
||||
- Fix for API error when very large temp tables are created in BigQuery ([#1423](https://github.com/fishtown-analytics/dbt/issues/1423), [#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
|
||||
- Fix for compiler errors that occurred if jinja code was present outside of a docs blocks in .md files ([#1513](https://github.com/fishtown-analytics/dbt/pull/1513), [#988](https://github.com/fishtown-analytics/dbt/issues/988))
|
||||
- Fix `TEXT` handling on postgres and redshift ([#1420](https://github.com/fishtown-analytics/dbt/pull/1420), [#781](https://github.com/fishtown-analytics/dbt/issues/781))
|
||||
- Fix for compiler error when vars are undefined but only used in disabled models ([#1429](https://github.com/fishtown-analytics/dbt/pull/1429), [#434](https://github.com/fishtown-analytics/dbt/issues/434))
|
||||
- Improved the error message when iterating over the results of a macro that doesn't exist ([#1425](https://github.com/fishtown-analytics/dbt/pull/1425), [#1424](https://github.com/fishtown-analytics/dbt/issues/1424))
|
||||
- Improved the error message when tests have invalid parameter definitions ([#1427](https://github.com/fishtown-analytics/dbt/pull/1427), [#1325](https://github.com/fishtown-analytics/dbt/issues/1325))
|
||||
- Improved the error message when a user tries to archive a non-existent table ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1066](https://github.com/fishtown-analytics/dbt/issues/1066))
|
||||
- Fix for archive logic which tried to create already-existing destination schemas ([#1398](https://github.com/fishtown-analytics/dbt/pull/1398), [#758](https://github.com/fishtown-analytics/dbt/issues/758))
|
||||
- Fix for incorrect error codes when Operations exit with an error ([#1406](https://github.com/fishtown-analytics/dbt/pull/1406), [#1377](https://github.com/fishtown-analytics/dbt/issues/1377))
|
||||
- Fix for missing compiled SQL when the rpc server encounters a database error ([#1381](https://github.com/fishtown-analytics/dbt/pull/1381), [#1371](https://github.com/fishtown-analytics/dbt/issues/1371))
|
||||
- Fix for broken link in the profile.yml generated by `dbt init` ([#1366](https://github.com/fishtown-analytics/dbt/pull/1366), [#1344](https://github.com/fishtown-analytics/dbt/issues/1344))
|
||||
- Fix the sample test.env file's redshift password field ([#1364](https://github.com/fishtown-analytics/dbt/pull/1364))
|
||||
- Fix collisions on models running concurrently that have duplicate names but have distinguishing aliases ([#1342](https://github.com/fishtown-analytics/dbt/pull/1342), [#1321](https://github.com/fishtown-analytics/dbt/issues/1321))
|
||||
- Fix for a bad error message when a `version` is missing from a package spec in `packages.yml` ([#1551](https://github.com/fishtown-analytics/dbt/pull/1551), [#1546](https://github.com/fishtown-analytics/dbt/issues/1546))
|
||||
- Fix for wrong package scope when the two-arg method of `ref` is used ([#1515](https://github.com/fishtown-analytics/dbt/pull/1515), [#1504](https://github.com/fishtown-analytics/dbt/issues/1504))
|
||||
- Fix missing import in test suite ([#1572](https://github.com/fishtown-analytics/dbt/pull/1572))
|
||||
- Fix for a Snowflake error when an external table exists in a schema that dbt operates on ([#1571](https://github.com/fishtown-analytics/dbt/pull/1571), [#1505](https://github.com/fishtown-analytics/dbt/issues/1505))
|
||||
|
||||
|
||||
### Under the hood
|
||||
- Use pytest for tests ([#1417](https://github.com/fishtown-analytics/dbt/pull/1417))
|
||||
- Use flake8 for linting ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1333](https://github.com/fishtown-analytics/dbt/issues/1333))
|
||||
- Added a flag for wrapping models and tests in jinja blocks ([#1407](https://github.com/fishtown-analytics/dbt/pull/1407), [#1400](https://github.com/fishtown-analytics/dbt/issues/1400))
|
||||
- Connection management: Bind connections threads rather than to names ([#1336](https://github.com/fishtown-analytics/dbt/pull/1336), [#1312](https://github.com/fishtown-analytics/dbt/issues/1312))
|
||||
- Add deprecation warning for dbt users on Python2 ([#1534](https://github.com/fishtown-analytics/dbt/pull/1534), [#1531](https://github.com/fishtown-analytics/dbt/issues/1531))
|
||||
- Upgrade networkx to v2.x ([#1509](https://github.com/fishtown-analytics/dbt/pull/1509), [#1496](https://github.com/fishtown-analytics/dbt/issues/1496))
|
||||
- Anonymously track adapter type and rpc requests when tracking is enabled ([#1575](https://github.com/fishtown-analytics/dbt/pull/1575), [#1574](https://github.com/fishtown-analytics/dbt/issues/1574))
|
||||
- Fix for test warnings and general test suite cleanup ([#1578](https://github.com/fishtown-analytics/dbt/pull/1578))
|
||||
|
||||
### Contributors:
|
||||
Over a dozen contributors wrote code for this release of dbt! Thanks for taking the time, and nice work y'all! :)
|
||||
|
||||
- [@nydnarb](https://github.com/nydnarb) ([#1363](https://github.com/fishtown-analytics/dbt/issues/1363))
|
||||
- [@emilieschario](https://github.com/emilieschario) ([#1366](https://github.com/fishtown-analytics/dbt/pull/1366))
|
||||
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1409](https://github.com/fishtown-analytics/dbt/pull/1409))
|
||||
- [@kasanchez](https://github.com/kasanchez) ([#1247](https://github.com/fishtown-analytics/dbt/pull/1247))
|
||||
- [@Blakewell](https://github.com/Blakewell) ([#1307](https://github.com/fishtown-analytics/dbt/pull/1307))
|
||||
- [@buremba](https://github.com/buremba) ([#1476](https://github.com/fishtown-analytics/dbt/pull/1476))
|
||||
- [@darrenhaken](https://github.com/darrenhaken) ([#1285](https://github.com/fishtown-analytics/dbt/pull/1285))
|
||||
- [@tbescherer](https://github.com/tbescherer) ([#1504](https://github.com/fishtown-analytics/dbt/issues/1504))
|
||||
- [@heisencoder](https://github.com/heisencoder) ([#1509](https://github.com/fishtown-analytics/dbt/pull/1509), [#1549](https://github.com/fishtown-analytics/dbt/pull/1549). [#1578](https://github.com/fishtown-analytics/dbt/pull/1578))
|
||||
- [@cclauss](https://github.com/cclauss) ([#1572](https://github.com/fishtown-analytics/dbt/pull/1572))
|
||||
- [@josegalarza](https://github.com/josegalarza) ([#1571](https://github.com/fishtown-analytics/dbt/pull/1571))
|
||||
- [@rmgpinto](https://github.com/rmgpinto) ([docs#31](https://github.com/fishtown-analytics/dbt-docs/pull/31), [docs#32](https://github.com/fishtown-analytics/dbt-docs/pull/32))
|
||||
- [@groodt](https://github.com/groodt) ([docs#34](https://github.com/fishtown-analytics/dbt-docs/pull/34))
|
||||
|
||||
|
||||
## dbt 0.13.1 (May 13, 2019)
|
||||
|
||||
### Overview
|
||||
This is a bugfix release.
|
||||
|
||||
### Bugfixes
|
||||
- Add "MaterializedView" relation type to the Snowflake adapter ([#1430](https://github.com/fishtown-analytics/dbt/issues/1430), [#1432](https://github.com/fishtown-analytics/dbt/pull/1432)) ([@adriank-convoy](https://github.com/adriank-convoy))
|
||||
- Quote databases properly ([#1396](https://github.com/fishtown-analytics/dbt/issues/1396), [#1402](https://github.com/fishtown-analytics/dbt/pull/1402))
|
||||
- Use "ilike" instead of "=" for database equality when listing schemas ([#1411](https://github.com/fishtown-analytics/dbt/issues/1411), [#1412](https://github.com/fishtown-analytics/dbt/pull/1412))
|
||||
- Pass the model name along in get_relations ([#1384](https://github.com/fishtown-analytics/dbt/issues/1384), [#1388](https://github.com/fishtown-analytics/dbt/pull/1388))
|
||||
- Add logging to dbt clean ([#1261](https://github.com/fishtown-analytics/dbt/issues/1261), [#1383](https://github.com/fishtown-analytics/dbt/pull/1383), [#1391](https://github.com/fishtown-analytics/dbt/pull/1391)) ([@emilieschario](https://github.com/emilieschario))
|
||||
|
||||
### dbt Docs
|
||||
- Search by columns ([dbt-docs#23](https://github.com/fishtown-analytics/dbt-docs/pull/23)) ([rmgpinto](https://github.com/rmgpinto))
|
||||
- Support @ selector ([dbt-docs#27](https://github.com/fishtown-analytics/dbt-docs/pull/27))
|
||||
- Fix number formatting on Snowflake and BQ in table stats ([dbt-docs#28](https://github.com/fishtown-analytics/dbt-docs/pull/28))
|
||||
|
||||
### Contributors:
|
||||
Thanks for your contributions to dbt!
|
||||
|
||||
- [@emilieschario](https://github.com/emilieschario)
|
||||
- [@adriank-convoy](https://github.com/adriank-convoy)
|
||||
- [@rmgpinto](https://github.com/rmgpinto)
|
||||
|
||||
|
||||
## dbt 0.13.0 - Stephen Girard (March 21, 2019)
|
||||
|
||||
### Overview
|
||||
|
||||
This release provides [a stable API for building new adapters](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter) and reimplements dbt's adapters as "plugins". Additionally, a new adapter for [Presto](https://github.com/fishtown-analytics/dbt-presto) was added using this architecture. Beyond adapters, this release of dbt also includes [Sources](https://docs.getdbt.com/v0.13/docs/using-sources) which can be used to document and test source data tables. See the full list of features added in 0.13.0 below.
|
||||
|
||||
### Breaking Changes
|
||||
- version 1 schema.yml specs are no longer implemented. Please use the version 2 spec instead ([migration guide](https://docs.getdbt.com/docs/upgrading-from-0-10-to-0-11#section-schema-yml-v2-syntax))
|
||||
- `{{this}}` is no longer implemented for `on-run-start` and `on-run-end` hooks. Use `{{ target }}` or an [`on-run-end` context variable](https://docs.getdbt.com/docs/on-run-end-context#section-schemas) instead ([#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878))
|
||||
- A number of materialization-specific adapter methods have changed in breaking ways. If you use these adapter methods in your macros or materializations, you may need to update your code accordingly.
|
||||
- query_for_existing - **removed**, use [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) instead.
|
||||
- [get_missing_columns](https://docs.getdbt.com/v0.13/reference#adapter-get-missing-columns) - changed to take `Relation`s instead of schemas and identifiers
|
||||
- [expand_target_column_types](https://docs.getdbt.com/v0.13/reference#adapter-expand-target-column-types) - changed to take a `Relation` instead of schema, identifier
|
||||
- [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) - added a `database` argument
|
||||
- [create_schema](https://docs.getdbt.com/v0.13/reference#adapter-create-schema) - added a `database` argument
|
||||
- [drop_schema](https://docs.getdbt.com/v0.13/reference#adapter-drop-schema) - added a `database` argument
|
||||
|
||||
### Deprecations
|
||||
- The following adapter methods are now deprecated, and will be removed in a future release:
|
||||
- get_columns_in_table - deprecated in favor of [get_columns_in_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-columns-in-relation)
|
||||
- already_exists - deprecated in favor of [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation)
|
||||
|
||||
### Features
|
||||
- Add `source`s to dbt, use them to calculate source data freshness ([docs](https://docs.getdbt.com/v0.13/docs/using-sources) ) ([#814](https://github.com/fishtown-analytics/dbt/issues/814), [#1240](https://github.com/fishtown-analytics/dbt/issues/1240))
|
||||
- Add support for Presto ([docs](https://docs.getdbt.com/v0.13/docs/profile-presto), [repo](https://github.com/fishtown-analytics/dbt-presto)) ([#1106](https://github.com/fishtown-analytics/dbt/issues/1106))
|
||||
- Add `require-dbt-version` option to `dbt_project.yml` to state the supported versions of dbt for packages ([docs](https://docs.getdbt.com/v0.13/docs/requiring-dbt-versions)) ([#581](https://github.com/fishtown-analytics/dbt/issues/581))
|
||||
- Add an output line indicating the installed version of dbt to every run ([#1134](https://github.com/fishtown-analytics/dbt/issues/1134))
|
||||
- Add a new model selector (`@`) which build models, their children, and their children's parents ([docs](https://docs.getdbt.com/v0.13/reference#section-the-at-operator)) ([#1156](https://github.com/fishtown-analytics/dbt/issues/1156))
|
||||
- Add support for Snowflake Key Pair Authentication ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-key-pair-authentication)) ([#1232](https://github.com/fishtown-analytics/dbt/pull/1232))
|
||||
- Support SSO Authentication for Snowflake ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-sso-authentication)) ([#1172](https://github.com/fishtown-analytics/dbt/issues/1172))
|
||||
- Add support for Snowflake's transient tables ([docs](https://docs.getdbt.com/v0.13/docs/snowflake-configs#section-transient-tables)) ([#946](https://github.com/fishtown-analytics/dbt/issues/946))
|
||||
- Capture build timing data in `run_results.json` to visualize project performance ([#1179](https://github.com/fishtown-analytics/dbt/issues/1179))
|
||||
- Add CLI flag to toggle warnings as errors ([docs](https://docs.getdbt.com/v0.13/reference#section-treat-warnings-as-errors)) ([#1243](https://github.com/fishtown-analytics/dbt/issues/1243))
|
||||
- Add tab completion script for Bash ([docs](https://github.com/fishtown-analytics/dbt-completion.bash)) ([#1197](https://github.com/fishtown-analytics/dbt/issues/1197))
|
||||
- Added docs on how to build a new adapter ([docs](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter)) ([#560](https://github.com/fishtown-analytics/dbt/issues/560))
|
||||
- Use new logo ([#1349](https://github.com/fishtown-analytics/dbt/pull/1349))
|
||||
|
||||
### Fixes
|
||||
- Fix for Postgres character columns treated as string types ([#1194](https://github.com/fishtown-analytics/dbt/issues/1194))
|
||||
- Fix for hard to reach edge case in which dbt could hang ([#1223](https://github.com/fishtown-analytics/dbt/issues/1223))
|
||||
- Fix for `dbt deps` in non-English shells ([#1222](https://github.com/fishtown-analytics/dbt/issues/1222))
|
||||
- Fix for over eager schema creation when models are run with `--models` ([#1239](https://github.com/fishtown-analytics/dbt/issues/1239))
|
||||
- Fix for `dbt seed --show` ([#1288](https://github.com/fishtown-analytics/dbt/issues/1288))
|
||||
- Fix for `is_incremental()` which should only return `True` if the target relation is a `table` ([#1292](https://github.com/fishtown-analytics/dbt/issues/1292))
|
||||
- Fix for error in Snowflake table materializations with custom schemas ([#1316](https://github.com/fishtown-analytics/dbt/issues/1316))
|
||||
- Fix errored out concurrent transactions on Redshift and Postgres ([#1356](https://github.com/fishtown-analytics/dbt/pull/1356))
|
||||
- Fix out of order execution on model select ([#1354](https://github.com/fishtown-analytics/dbt/issues/1354), [#1355](https://github.com/fishtown-analytics/dbt/pull/1355))
|
||||
- Fix adapter macro namespace issue ([#1352](https://github.com/fishtown-analytics/dbt/issues/1352), [#1353](https://github.com/fishtown-analytics/dbt/pull/1353))
|
||||
- Re-add CLI flag to toggle warnings as errors ([#1347](https://github.com/fishtown-analytics/dbt/pull/1347))
|
||||
- Fix release candidate regression that runs run hooks on test invocations ([#1346](https://github.com/fishtown-analytics/dbt/pull/1346))
|
||||
- Fix Snowflake source quoting ([#1338](https://github.com/fishtown-analytics/dbt/pull/1338), [#1317](https://github.com/fishtown-analytics/dbt/issues/1317), [#1332](https://github.com/fishtown-analytics/dbt/issues/1332))
|
||||
- Handle unexpected max_loaded_at types ([#1330](https://github.com/fishtown-analytics/dbt/pull/1330))
|
||||
|
||||
### Under the hood
|
||||
- Replace all SQL in Python code with Jinja in macros ([#1204](https://github.com/fishtown-analytics/dbt/issues/1204))
|
||||
- Loosen restrictions of boto3 dependency ([#1234](https://github.com/fishtown-analytics/dbt/issues/1234))
|
||||
- Rewrote Postgres introspective queries to be faster on large databases ([#1192](https://github.com/fishtown-analytics/dbt/issues/1192)
|
||||
|
||||
|
||||
### Contributors:
|
||||
Thanks for your contributions to dbt!
|
||||
|
||||
- [@patrickgoss](https://github.com/patrickgoss) [#1193](https://github.com/fishtown-analytics/dbt/issues/1193)
|
||||
- [@brianhartsock](https://github.com/brianhartsock) [#1191](https://github.com/fishtown-analytics/dbt/pull/1191)
|
||||
- [@alexyer](https://github.com/alexyer) [#1232](https://github.com/fishtown-analytics/dbt/pull/1232)
|
||||
- [@adriank-convoy](https://github.com/adriank-convoy) [#1224](https://github.com/fishtown-analytics/dbt/pull/1224)
|
||||
- [@mikekaminsky](https://github.com/mikekaminsky) [#1216](https://github.com/fishtown-analytics/dbt/pull/1216)
|
||||
- [@vijaykiran](https://github.com/vijaykiran) [#1198](https://github.com/fishtown-analytics/dbt/pull/1198), [#1199](https://github.com/fishtown-analytics/dbt/pull/1199)
|
||||
|
||||
## dbt 0.12.2 - Grace Kelly (January 8, 2019)
|
||||
|
||||
### Overview
|
||||
|
||||
This release reduces the runtime of dbt projects by improving dbt's approach to model running. Additionally, a number of workflow improvements have been added.
|
||||
|
||||
### Deprecations
|
||||
- Deprecate `sql_where` ([#744](https://github.com/fishtown-analytics/dbt/issues/744)) ([docs](https://docs.getdbt.com/v0.12/docs/configuring-incremental-models))
|
||||
|
||||
### Features
|
||||
- More intelligently order and execute nodes in the graph. This _significantly_ speeds up the runtime of most dbt projects ([#813](https://github.com/fishtown-analytics/dbt/issues/813))
|
||||
- Add `-m` flag as an alias for `--models` ([#1160](https://github.com/fishtown-analytics/dbt/issues/1160))
|
||||
- Add `post_hook` and `pre_hook` as aliases for `post-hook` and `pre-hook`, respectively ([#1124](https://github.com/fishtown-analytics/dbt/issues/1124)) ([docs](https://docs.getdbt.com/v0.12/docs/using-hooks))
|
||||
- Better handling of git errors in `dbt deps` + full support for Windows ([#994](https://github.com/fishtown-analytics/dbt/issues/994), [#778](https://github.com/fishtown-analytics/dbt/issues/778), [#895](https://github.com/fishtown-analytics/dbt/issues/895))
|
||||
- Add support for specifying a `location` in BigQuery datasets ([#969](https://github.com/fishtown-analytics/dbt/issues/969)) ([docs](https://docs.getdbt.com/v0.12/docs/supported-databases#section-dataset-locations))
|
||||
- Add support for Jinja expressions using the `{% do ... %}` block ([#1113](https://github.com/fishtown-analytics/dbt/issues/1113))
|
||||
- The `dbt debug` command is actually useful now ([#1061](https://github.com/fishtown-analytics/dbt/issues/1061))
|
||||
- The `config` function can now be called multiple times in a model ([#558](https://github.com/fishtown-analytics/dbt/issues/558))
|
||||
- Source the latest version of dbt from PyPi instead of GitHub ([#1122](https://github.com/fishtown-analytics/dbt/issues/1122))
|
||||
- Add a peformance profiling mechnanism to dbt ([#1001](https://github.com/fishtown-analytics/dbt/issues/1001))
|
||||
- Add caching for dbt's macros-only manifest to speedup parsing ([#1098](https://github.com/fishtown-analytics/dbt/issues/1098))
|
||||
|
||||
### Fixes
|
||||
- Fix for custom schemas used alongside the `generate_schema_name` macro ([#801](https://github.com/fishtown-analytics/dbt/issues/801))
|
||||
- Fix for silent failure of tests that reference nonexistent models ([#968](https://github.com/fishtown-analytics/dbt/issues/968))
|
||||
- Fix for `generate_schema_name` macros that return whitespace-padded schema names ([#1074](https://github.com/fishtown-analytics/dbt/issues/1074))
|
||||
- Fix for incorrect relation type for backup tables on Snowflake ([#1103](https://github.com/fishtown-analytics/dbt/issues/1103))
|
||||
- Fix for incorrectly cased values in the relation cache ([#1140](https://github.com/fishtown-analytics/dbt/issues/1140))
|
||||
- Fix for JSON decoding error on Python2 installed with Anaconda ([#1155](https://github.com/fishtown-analytics/dbt/issues/1155))
|
||||
- Fix for unhandled exceptions that occur in anonymous event tracking ([#1180](https://github.com/fishtown-analytics/dbt/issues/1180))
|
||||
- Fix for analysis files that contain `raw` tags ([#1152](https://github.com/fishtown-analytics/dbt/issues/1152))
|
||||
- Fix for packages which reference the [hubsite](hub.getdbt.com) ([#1095](https://github.com/fishtown-analytics/dbt/issues/1095))
|
||||
|
||||
## dbt 0.12.1 - (November 15, 2018)
|
||||
|
||||
### Overview
|
||||
@@ -38,8 +349,8 @@ This release adds caching for some introspective queries on all adapters. Additi
|
||||
- Improved error handling and messaging on Redshift ([#997](https://github.com/fishtown-analytics/dbt/issues/997))
|
||||
- Include datasets with underscores when listing BigQuery datasets ([#954](https://github.com/fishtown-analytics/dbt/pull/954))
|
||||
- Forgo validating the user's profile for `dbt deps` and `dbt clean` commands ([#947](https://github.com/fishtown-analytics/dbt/issues/947), [#1022](https://github.com/fishtown-analytics/dbt/issues/1022))
|
||||
- Don't read/parse CSV files outside of the `dbt seed` command ([#1046](https://github.com/fishtown-analytics/dbt/pull/1046))
|
||||
|
||||
- Don't read/parse CSV files outside of the `dbt seed` command ([#1046](https://github.com/fishtown-analytics/dbt/pull/1046))
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix for incorrect model selection with the `--models` CLI flag when projects and directories share the same name ([#1023](https://github.com/fishtown-analytics/dbt/issues/1023))
|
||||
@@ -48,13 +359,13 @@ This release adds caching for some introspective queries on all adapters. Additi
|
||||
- Fix for unwieldly Jinja errors regarding undefined variables at parse time ([#1086](https://github.com/fishtown-analytics/dbt/pull/1086), [#1080](https://github.com/fishtown-analytics/dbt/issues/1080), [#935](https://github.com/fishtown-analytics/dbt/issues/935))
|
||||
- Fix for incremental models that have a line comment on the last line of the file ([#1018](https://github.com/fishtown-analytics/dbt/issues/1018))
|
||||
- Fix for error messages when ephemeral models fail to compile ([#1053](https://github.com/fishtown-analytics/dbt/pull/1053))
|
||||
|
||||
|
||||
|
||||
### Under the hood
|
||||
- Create adapters as singleton objects instead of classes ([#961](https://github.com/fishtown-analytics/dbt/issues/961))
|
||||
- Combine project and profile into a single, coherent object ([#973](https://github.com/fishtown-analytics/dbt/pull/973))
|
||||
- Investigate approaches for providing more complete compilation output ([#588](https://github.com/fishtown-analytics/dbt/issues/588))
|
||||
|
||||
|
||||
|
||||
### Contributors
|
||||
|
||||
@@ -293,7 +604,7 @@ brew install dbt
|
||||
|
||||
|
||||
### Breaking Changes
|
||||
- `adapter` functions must be namespaced to the `adapter` context variable. To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/reference#adapter).
|
||||
- `adapter` functions must be namespaced to the `adapter` context variable. To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/docs/adapter).
|
||||
|
||||
|
||||
### Bugfixes
|
||||
@@ -392,7 +703,7 @@ Compilation Error in model {your_model} (models/path/to/your_model.sql)
|
||||
'already_exists' is undefined
|
||||
```
|
||||
|
||||
To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/reference#adapter).
|
||||
To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/docs/adapter).
|
||||
|
||||
### Bugfixes
|
||||
- Handle lingering `__dbt_tmp` relations ([#511](https://github.com/fishtown-analytics/dbt/pull/511))
|
||||
|
||||
349
CONTRIBUTING.md
349
CONTRIBUTING.md
@@ -2,67 +2,76 @@
|
||||
|
||||
## About this document
|
||||
|
||||
This is intended as a developer's guide to modifying and using dbt. It is not intended as a guide for end users of dbt (though if it is helpful, that's great!) and assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, module/filesystem layouts, etc. It also assumes you are using macOS or Linux and are comfortable with the command line.
|
||||
This document is a guide intended for folks interested in contributing to dbt. It is not intended as a guide for end users of dbt (though if it is helpful, that's great!) and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. It also assumes you are using macOS or Linux and are comfortable with the command line. If you get stuck while reading this guide, drop us a line in the #development channel on [slack](slack.getdbt.com).
|
||||
|
||||
## Getting the code
|
||||
|
||||
### Installing git
|
||||
|
||||
You will need `git` in order to download and modify the dbt source code. On macOS, the best way to download git is to just install Xcode.
|
||||
|
||||
### External contributors
|
||||
|
||||
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to dbt by forking the dbt repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
|
||||
|
||||
1. fork the dbt repository
|
||||
2. clone your fork
|
||||
3. check out a new branch for your proposed changes
|
||||
4. push changes to your fork
|
||||
5. open a pull request against `fishtown-analytics/dbt` from your forked repository
|
||||
|
||||
### Core contributors
|
||||
|
||||
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the dbt repo. Rather than
|
||||
forking dbt to make your changes, just clone the repository and push directly to a branch.
|
||||
|
||||
|
||||
## Setting up an environment
|
||||
|
||||
Before you can develop dbt effectively, you should set up the following:
|
||||
To begin developing code in dbt, you should set up the following:
|
||||
|
||||
### pyenv
|
||||
### virtualenv
|
||||
|
||||
We strongly recommend setting up [pyenv](https://github.com/pyenv/pyenv) and its [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv) plugin. This setup will make it much easier for you to manage multiple Python projects in the medium to long term.
|
||||
We strongly recommend using virtual environments when developing code in dbt. We recommend creating this virtualenv
|
||||
in the root of the dbt repository. To create a new virtualenv, run:
|
||||
```
|
||||
python3 -m venv env
|
||||
source env/bin/activate
|
||||
```
|
||||
|
||||
### python
|
||||
|
||||
By default, `pyenv` has only one python version installed and it's the `system` python - the one that comes with your OS. You don't want that. Instead, use `pyenv install 3.6.5` to install a more recent version. dbt supports up to Python 3.6 at the time of writing (and will soon support Python 3.7)
|
||||
|
||||
To get a full (very long!) list of versions available, you can do `pyenv install -l` and look for the versions defined by numbers alone - the others are variants of Python and outside the scope of this document.
|
||||
This will create and activate a new Python virtual environment.
|
||||
|
||||
### docker and docker-compose
|
||||
|
||||
Docker and docker-compose are both used in testing. For macOS, the easiest thing to do is to go [here](https://store.docker.com/editions/community/docker-ce-desktop-mac) and download it. You'll need to make an account. On Linux, if you can use one of the packages [here](https://docs.docker.com/install/#server). We recommend installing from docker.com instead of from your package manager. On Linux you also have to install docker-compose separately, follow [these instructions](https://docs.docker.com/compose/install/#install-compose).
|
||||
|
||||
### git
|
||||
|
||||
You will also need `git` in order to get dbt and contribute code. On macOS, the best way to get that is to just install Xcode.
|
||||
|
||||
### GitHub
|
||||
|
||||
You will need a GitHub account fully configured with SSH to contribute to dbt. GitHub has [an excellent guide on how to set up SSH](https://help.github.com/articles/connecting-to-github-with-ssh/) -- we strongly recommend you follow their guide if you are unfamiliar with SSH.
|
||||
|
||||
### Getting dbt
|
||||
|
||||
Now clone dbt to wherever you'd like. For example:
|
||||
|
||||
```
|
||||
mkdir -p ~/git/
|
||||
cd ~/git
|
||||
git clone git@github.com:fishtown-analytics/dbt.git
|
||||
```
|
||||
|
||||
But it really does not matter where you put it as long as you remember it.
|
||||
|
||||
### Setting up your virtualenv
|
||||
|
||||
Set up a fresh virtualenv with pyenv-virtualenv for dbt:
|
||||
|
||||
```
|
||||
pyenv virtualenv 3.6.5 dbt36
|
||||
cd ~/git/dbt
|
||||
pyenv local dbt36
|
||||
```
|
||||
|
||||
This makes a new virtualenv based on python 3.6.5 named `dbt36`, and tells pyenv that when you're in the `dbt` directory it should automatically use that virtualenv.
|
||||
Docker and docker-compose are both used in testing. For macOS, the easiest thing to do is to [download docker for mac](https://store.docker.com/editions/community/docker-ce-desktop-mac). You'll need to make an account. On Linux, you can use one of the packages [here](https://docs.docker.com/install/#server). We recommend installing from docker.com instead of from your package manager. On Linux you also have to install docker-compose separately, follow [these instructions](https://docs.docker.com/compose/install/#install-compose).
|
||||
|
||||
|
||||
### Installing postgres locally
|
||||
### Installing postgres locally (optional)
|
||||
|
||||
For testing, and later in the examples in this document, you may want to have `psql` available so you can poke around in the database and see what happened. We recommend that you use [homebrew](https://brew.sh/) for that on macOS, and your package manager on Linux. You can install any version of the postgres client that you'd like. So on macOS, with homebrew setup:
|
||||
For testing, and later in the examples in this document, you may want to have `psql` available so you can poke around in the database and see what happened. We recommend that you use [homebrew](https://brew.sh/) for that on macOS, and your package manager on Linux. You can install any version of the postgres client that you'd like. On macOS, with homebrew setup, you can run:
|
||||
|
||||
```
|
||||
brew install postgresql
|
||||
```
|
||||
|
||||
## Running dbt in development
|
||||
|
||||
### Installation
|
||||
|
||||
First make sure that you set up your `virtualenv` as described in section _Setting up an environment_. Next, install dbt (and it's dependencies) with:
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
When dbt is installed from source in this way, any changes you make to the dbt source code will be reflected immediately in your next `dbt` run.
|
||||
|
||||
### Running dbt
|
||||
|
||||
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
|
||||
|
||||
Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as necessary to connect to your target databases. It may be a good idea to add a new profile pointing to a local postgres instance, or a specific test sandbox within your data warehouse if appropriate.
|
||||
|
||||
## Testing
|
||||
|
||||
Getting the dbt integration tests set up in your local environment will be very helpful as you start to make changes to your local version of dbt. The section that follows outlines some helpful tips for setting up the test environment.
|
||||
@@ -71,250 +80,76 @@ Getting the dbt integration tests set up in your local environment will be very
|
||||
|
||||
A short list of tools used in dbt testing that will be helpful to your understanding:
|
||||
|
||||
- [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage dependencies and stuff
|
||||
- [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage dependencies
|
||||
- [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions
|
||||
- [nosetests](http://nose.readthedocs.io/en/latest) to discover/run tests
|
||||
- [pytest](https://docs.pytest.org/en/latest/) to discover/run tests
|
||||
- [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
|
||||
- [pep8](https://pep8.readthedocs.io/en/release-1.7.x/) for code linting
|
||||
- [CircleCI](https://circleci.com/product/) and [Appveyor](https://www.appveyor.com/docs/)
|
||||
- [flake8](https://gitlab.com/pycqa/flake8) for code linting
|
||||
- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/)
|
||||
|
||||
If you're unfamiliar with any or all of these, that's fine! You really do not have to have a deep understanding of any of these to get by.
|
||||
A deep understanding of these tools in not required to effectively contribute to dbt, but we recommend checking out the attached documentation if you're interested in learning more about them.
|
||||
|
||||
Our test environment goes like this:
|
||||
|
||||
- CircleCI and Appveyor run `tox`
|
||||
- `make test` runs `docker-compose`
|
||||
- `docker-compose` runs `tox`
|
||||
- `tox` sets up virtualenvs for each distinct set of tests and runs `nosetests`
|
||||
- `nosetests` finds all the appropriate tests and runs them
|
||||
|
||||
### Running tests via Docker
|
||||
|
||||
The basics should work with basically no further setup. In the terminal, `cd` to the directory where you cloned dbt. So, for example, if you cloned dbt to `~/git/dbt`:
|
||||
dbt's unit and integration tests run in Docker. Because dbt works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on _Submitting a Pull Request_ below for more information on this CI setup.
|
||||
|
||||
```
|
||||
cd ~/git/dbt
|
||||
```
|
||||
|
||||
Then you'll want to make a test.env file. Fortunately, there's a sample which is fine for our purposes:
|
||||
#### Specifying your test credentials
|
||||
|
||||
dbt uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against dbt. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials:
|
||||
|
||||
```
|
||||
cp test.env.sample test.env
|
||||
atom test.env # supply your credentials
|
||||
```
|
||||
|
||||
If you want to test snowflake/bigquery/redshift locally you'll need to get credentials and add them to this file. But, to start, you can just focus on postgres tests. They have the best coverage, are the fastest, and are the easiest to set up.
|
||||
We recommend starting with dbt's Postgres tests. These tests cover most of the functionality in dbt, are the fastest to run, and are the easiest to set up. dbt's test suite runs Postgres in a Docker container, so no setup should be required to run these tests. If you additionally want to test Snowflake, Bigquery, or Redshift locally you'll need to get credentials and add them to the `test.env` file.
|
||||
|
||||
To run the unit tests, use `make test-unit` - it will run the unit tests on python 2.7 and 3.6, and a pep8 linter.
|
||||
#### Running tests
|
||||
|
||||
To run the postgres+python 3.6 integration tests, you'll have to do one extra step of setting up the database:
|
||||
dbt's unit tests and Python linter can be run with:
|
||||
|
||||
```
|
||||
make test-unit
|
||||
```
|
||||
|
||||
To run the Postgres + Python 3.6 integration tests, you'll have to do one extra step of setting up the test database:
|
||||
|
||||
```
|
||||
docker-compose up -d database
|
||||
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
|
||||
```
|
||||
|
||||
And then to actually run them, you can do `make test-quick`.
|
||||
|
||||
If you want to see what exactly is getting run on these commands, look at the `Makefile`. Note that the commands start with an `@` which you can ignore, just makefile magic. If you want to see what the involved `tox` commands are using, look at the corresponding `tox.ini` section - hopefully it's pretty self-explanatory.
|
||||
|
||||
### Running tests in CI
|
||||
|
||||
When a contributor to dbt pushes code, GitHub will trigger a series of CI builds on CircleCI and Appveyor (Windows) to test all of dbt's code. The CI builds trigger all the integration tests, not just postgres+python3.6.
|
||||
|
||||
The Snowflake tests take a very long time to run (about an hour), so don't just sit around waiting, it'll be a while!
|
||||
|
||||
If you open a PR as a non-contributor, these tests won't run automatically. Someone from the dbt team will reach out to you and get them running after reviewing your code.
|
||||
|
||||
## Running dbt locally
|
||||
|
||||
Sometimes, you're going to have to pretend to be an end user to reproduce bugs and stuff. So that means manually setting up some stuff that the test harness takes care of for you.
|
||||
|
||||
### installation
|
||||
|
||||
First, from the `dbt` directory, install dbt in 'editable' mode. There are a couple ways to do it, but I'm in the habit of `pip install -e .`, which tells pip to install the package in the current directory in "editable" mode. What's cool about this mode is any changes you make to the current dbt directory will be reflected immediately in your next `dbt` run.
|
||||
|
||||
### Profile
|
||||
|
||||
Now you'll also need a 'dbt profile' so dbt can tell how to connect to your database. By default, this file belongs at `~/.dbt/profiles.yml`, so `mkdir ~/.dbt` and then open your favorite text editor and write out something like this to `~/.dbt/profiles.yml`:
|
||||
To run a quick test for Python3 integration tests on Postgres, you can run:
|
||||
|
||||
```
|
||||
config:
|
||||
send_anonymous_usage_stats: False
|
||||
use_colors: True
|
||||
|
||||
talk:
|
||||
outputs:
|
||||
default:
|
||||
type: postgres
|
||||
threads: 4
|
||||
host: localhost
|
||||
port: 5432
|
||||
user: root
|
||||
pass: password
|
||||
dbname: postgres
|
||||
schema: dbt_postgres
|
||||
target: default
|
||||
make test-quick
|
||||
```
|
||||
|
||||
There's a sample you can look at in the `dbt` folder (`sample.profiles.yml`) but it's got a lot of extra and as a developer, you really probably only want to test against your local postgres container. The basic idea is that there are multiple 'profiles' (`talk`, in this case) and within those each profile has one or more 'targets' (`default`, in this case), and each profile has a default target. You can specify what profile you want to use with the `--profile` flag, and which target with the `--target` flag. If you want to be really snazzy, dbt project files actually specify their target, and if you match up your dbt project `profile` key with your `profiles.yml` profile names you don't have to use `--profile` (and if you like your profile's default target, no need for `--target` either).
|
||||
|
||||
## Example
|
||||
|
||||
There is a very simple project that is a very nice example of dbt's capabilities, you can get it from github:
|
||||
|
||||
To run tests for a specific database, invoke `tox` directly with the required flags:
|
||||
```
|
||||
cd ~/src/fishtown
|
||||
git clone git@github.com:fishtown-analytics/talk.git
|
||||
git checkout use-postgres
|
||||
# Run Postgres py36 tests
|
||||
docker-compose run test tox -e integration-postgres-py36 -- -x
|
||||
|
||||
# Run Snowflake py36 tests
|
||||
docker-compose run test tox -e integration-snowflake-py36 -- -x
|
||||
|
||||
# Run BigQuery py36 tests
|
||||
docker-compose run test tox -e integration-bigquery-py36 -- -x
|
||||
|
||||
# Run Redshift py36 tests
|
||||
docker-compose run test tox -e integration-redshift-py36 -- -x
|
||||
```
|
||||
|
||||
The `use-postgres` branch configures the project to use your local postgres (instead of the default, Snowflake). You should poke around in this project a bit, particularly the `models` directory.
|
||||
See the `Makefile` contents for more some other examples of ways to run `tox`.
|
||||
|
||||
Before doing anything, let's check the database out:
|
||||
### Submitting a Pull Request
|
||||
|
||||
```
|
||||
> PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres psql
|
||||
psql (10.4)
|
||||
Type "help" for help.
|
||||
Fishtown Analytics provides a sandboxed Redshift, Snowflake, and BigQuery database for use in a CI environment.
|
||||
|
||||
postgres=# \dn
|
||||
List of schemas
|
||||
Name | Owner
|
||||
--------+----------
|
||||
public | postgres
|
||||
(1 row)
|
||||
When pull requests are submitted to the `fishtown-analytics/dbt` repo, GitHub will trigger automated tests in CircleCI and Azure Pipelines. If the PR submitter is a member of the `fishtown-analytics` GitHub organization, then the credentials for these databases will be automatically supplied as environment variables in the CI test suite.
|
||||
|
||||
postgres=# \q
|
||||
```
|
||||
**If the PR submitter is not a member of the `fishtown-analytics` organization, then these environment variables will not be automatically supplied in the CI environment**. Once a core maintainer has taken a look at the Pull Request, they will kick off the test suite with the required credentials.
|
||||
|
||||
`\dn` lists schemas in postgres. You can see that we just have the default "public" schema, so we haven't done anything yet.
|
||||
|
||||
|
||||
If you compile your model with `dbt compile` you should see something like this:
|
||||
|
||||
```
|
||||
> dbt compile
|
||||
Found 2 models, 0 tests, 0 archives, 0 analyses, 59 macros, 1 operations, 1 seed files
|
||||
|
||||
09:49:57 | Concurrency: 2 threads (target='default')
|
||||
09:49:57 |
|
||||
09:49:57 | Done.
|
||||
```
|
||||
|
||||
So what does that mean? Well:
|
||||
|
||||
- `2 models` refers to the contents of the `models` directory
|
||||
- `59 macros` are the builtin global macros defind by dbt itself
|
||||
- `1 operations` is the catalog generation operation that runs by default
|
||||
- `1 seed files` refers to the seed data in `data/moby_dick.csv`
|
||||
|
||||
It will create two new folders: One named `dbt_modules`, which is empty for this case, and one named `target`, which has a few things in it:
|
||||
|
||||
- A folder named `compiled`, created by dbt looking at your models and your database schema and filling in references (so `models/moby_dick_base.sql` becomes `target/compiled/talk/moby_dick_base.sql` by replacing the `from {{ ref('moby_dick') }}` with `from "dbt_postgres".moby_dick`)
|
||||
- A file named `graph.gpickle`, which is your project's dependency/reference graph as understood by the `networkx` library.
|
||||
- A file named `catalog.json`, which is the data dbt has collected about your project (macros used, models/seeds used, and parent/child reference maps)
|
||||
|
||||
|
||||
Next, load the seed file into the database with `dbt seed`, it'll look like this:
|
||||
|
||||
```
|
||||
> dbt seed
|
||||
Found 2 models, 0 tests, 0 archives, 0 analyses, 59 macros, 1 operations, 1 seed files
|
||||
|
||||
10:40:46 | Concurrency: 2 threads (target='default')
|
||||
10:40:46 |
|
||||
10:40:46 | 1 of 1 START seed file dbt_postgres.moby_dick........................ [RUN]
|
||||
10:40:47 | 1 of 1 OK loaded seed file dbt_postgres.moby_dick.................... [INSERT 17774 in 0.44s]
|
||||
10:40:47 |
|
||||
10:40:47 | Finished running 1 seeds in 0.65s.
|
||||
|
||||
Completed successfully
|
||||
```
|
||||
|
||||
If you go into postgres now, you can see that you have a new schema ('dbt_postgres'), a new table in that schema ('moby_dick'), and a bunch of stuff in that table:
|
||||
|
||||
```
|
||||
> PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres psql
|
||||
psql (10.4)
|
||||
Type "help" for help.
|
||||
|
||||
postgres=# \dn
|
||||
List of schemas
|
||||
Name | Owner
|
||||
--------------+----------
|
||||
dbt_postgres | root
|
||||
public | postgres
|
||||
(2 rows)
|
||||
|
||||
postgres=# \dt dbt_postgres.*
|
||||
List of relations
|
||||
Schema | Name | Type | Owner
|
||||
--------------+-----------+-------+-------
|
||||
dbt_postgres | moby_dick | table | root
|
||||
(1 row)
|
||||
|
||||
postgres=# select count(*) from dbt_postgres.moby_dick;
|
||||
count
|
||||
-------
|
||||
17774
|
||||
(1 row)
|
||||
|
||||
postgres=# \q
|
||||
```
|
||||
|
||||
If you run `dbt run` now, you'll see something like this:
|
||||
|
||||
```
|
||||
> dbt run
|
||||
Found 2 models, 0 tests, 0 archives, 0 analyses, 59 macros, 1 operations, 1 seed files
|
||||
|
||||
10:19:41 | Concurrency: 2 threads (target='default')
|
||||
10:19:41 |
|
||||
10:19:41 | 1 of 2 START view model dbt_postgres.moby_dick_base.................. [RUN]
|
||||
10:19:41 | 1 of 2 OK created view model dbt_postgres.moby_dick_base............. [CREATE VIEW in 0.05s]
|
||||
10:19:41 | 2 of 2 START table model dbt_postgres.word_count..................... [RUN]
|
||||
10:19:42 | 2 of 2 OK created table model dbt_postgres.word_count................ [SELECT 27390 in 0.19s]
|
||||
10:19:42 |
|
||||
10:19:42 | Finished running 1 view models, 1 table models in 0.53s.
|
||||
|
||||
Completed successfully
|
||||
|
||||
Done. PASS=2 ERROR=0 SKIP=0 TOTAL=2
|
||||
```
|
||||
|
||||
So, some of the same information and then you can see that dbt created a view (`moby_dick_base`) and a table (`word_count`). If you go into postgres, you'll be able to see that!
|
||||
|
||||
If you want to inspect the result, you can do so via psql:
|
||||
|
||||
```
|
||||
> PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres psql
|
||||
psql (10.4)
|
||||
Type "help" for help.
|
||||
|
||||
postgres=# \dt dbt_postgres.*
|
||||
List of relations
|
||||
Schema | Name | Type | Owner
|
||||
--------------+------------+-------+-------
|
||||
dbt_postgres | moby_dick | table | root
|
||||
dbt_postgres | word_count | table | root
|
||||
(2 rows)
|
||||
|
||||
postgres=# select * from dbt_postgres.word_count order by ct desc limit 10;
|
||||
word | ct
|
||||
------+-------
|
||||
the | 13394
|
||||
| 12077
|
||||
of | 6368
|
||||
and | 5846
|
||||
to | 4382
|
||||
a | 4377
|
||||
in | 3767
|
||||
that | 2753
|
||||
his | 2406
|
||||
I | 1826
|
||||
(10 rows)
|
||||
```
|
||||
|
||||
It's pretty much what you'd expect - the most common words are "the", "of", "and", etc. (The empty string probably should not be there, but this is just a toy example!)
|
||||
|
||||
So what happened here? First, `dbt seed` loaded the data in the csv file into postgres. Then `dbt compile` built out a sort of plan for how everything is linked together by looking up references and macros and the current state of the database. And finally, `dbt run` ran the compiled SQL to generate the word_count table.
|
||||
Once your tests are passing and your PR has been reviewed, a dbt maintainer will merge your changes into the active development branch! And that's it! Happy developing :tada:
|
||||
|
||||
59
Dockerfile
59
Dockerfile
@@ -1,14 +1,53 @@
|
||||
FROM python:3.6
|
||||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
RUN apt-get install -y python-pip netcat
|
||||
RUN apt-get install -y python-dev python3-dev
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
netcat postgresql make build-essential libssl-dev zlib1g-dev \
|
||||
libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev \
|
||||
xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev git ca-certificates \
|
||||
curl git ssh && \
|
||||
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
RUN pip install pip --upgrade
|
||||
RUN pip install virtualenv
|
||||
RUN pip install virtualenvwrapper
|
||||
RUN pip install tox
|
||||
RUN useradd -mU dbt_test_user
|
||||
RUN mkdir /usr/app && chown dbt_test_user /usr/app
|
||||
RUN mkdir /home/tox && chown dbt_test_user /home/tox
|
||||
USER dbt_test_user
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
RUN cd /usr/src/app
|
||||
WORKDIR /usr/app
|
||||
VOLUME /usr/app
|
||||
|
||||
RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
|
||||
|
||||
ENV PYENV_ROOT="/home/dbt_test_user/.pyenv" \
|
||||
PATH="/home/dbt_test_user/.pyenv/bin:/home/dbt_test_user/.pyenv/shims:$PATH"
|
||||
|
||||
RUN pyenv update && \
|
||||
echo "2.7.16 3.6.8 3.7.3" | xargs -P 4 -n 1 pyenv install && \
|
||||
pyenv global $(pyenv versions --bare)
|
||||
|
||||
RUN pyenv virtualenv 3.6.8 dbt36 && \
|
||||
pyenv virtualenv 3.7.3 dbt37 && \
|
||||
pyenv virtualenv 2.7.16 dbt27
|
||||
|
||||
RUN cd /usr/app && \
|
||||
python -m pip install -U pip && \
|
||||
python -m pip install tox && \
|
||||
pyenv local dbt37 && \
|
||||
python -m pip install -U pip && \
|
||||
python -m pip install tox && \
|
||||
pyenv local --unset && \
|
||||
pyenv local dbt36 && \
|
||||
python -m pip install -U pip && \
|
||||
python -m pip install tox && \
|
||||
pyenv local --unset && \
|
||||
pyenv local dbt27 && \
|
||||
python -m pip install -U pip && \
|
||||
python -m pip install tox && \
|
||||
pyenv local --unset && \
|
||||
pyenv rehash
|
||||
|
||||
RUN pyenv local dbt36 dbt37 dbt27
|
||||
|
||||
ENV PYTHONIOENCODING=utf-8
|
||||
|
||||
2
Makefile
2
Makefile
@@ -11,7 +11,7 @@ test:
|
||||
|
||||
test-unit:
|
||||
@echo "Unit test run starting..."
|
||||
@time docker-compose run test tox -e unit-py27,unit-py36,pep8
|
||||
@time docker-compose run test tox -e unit-py27,unit-py36,flake8
|
||||
|
||||
test-integration:
|
||||
@echo "Integration test run starting..."
|
||||
|
||||
75
README.md
75
README.md
@@ -1,49 +1,58 @@
|
||||
# dbt
|
||||
<p align="center">
|
||||
<img src="https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-horizontal.png?raw=true" alt="dbt logo"/>
|
||||
</p>
|
||||
<p align="center">
|
||||
<a href="https://codeclimate.com/github/fishtown-analytics/dbt">
|
||||
<img src="https://codeclimate.com/github/fishtown-analytics/dbt/badges/gpa.svg" alt="Code Climate"/>
|
||||
</a>
|
||||
<a href="https://circleci.com/gh/fishtown-analytics/dbt/tree/master">
|
||||
<img src="https://circleci.com/gh/fishtown-analytics/dbt/tree/master.svg?style=svg" alt="CircleCI" />
|
||||
</a>
|
||||
<a href="https://ci.appveyor.com/project/DrewBanin/dbt/branch/development">
|
||||
<img src="https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/development?svg=true" alt="AppVeyor" />
|
||||
</a>
|
||||
<a href="https://slack.getdbt.com">
|
||||
<img src="https://slack.getdbt.com/badge.svg" alt="Slack" />
|
||||
</a>
|
||||
</p>
|
||||
|
||||
dbt (data build tool) helps analysts write reliable, modular code using a workflow that closely mirrors software development.
|
||||
**[dbt](https://www.getdbt.com/)** (data build tool) enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
|
||||
|
||||
A dbt project primarily consists of "models". These models are SQL `select` statements that filter, aggregate, and otherwise transform data to facilitate analytics. Analysts use dbt to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
|
||||
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
|
||||
|
||||
These models frequently build on top of one another. Fortunately, dbt makes it easy to [manage relationships](https://docs.getdbt.com/reference#ref) between models, [test](https://docs.getdbt.com/docs/testing) your assumptions, and [visualize](https://graph.sinterdata.com/) your projects.
|
||||

|
||||
|
||||
Still reading? Check out the [docs](https://docs.getdbt.com/docs/overview) for more information.
|
||||
dbt can be used to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
|
||||
|
||||

|
||||
## Understanding dbt
|
||||
|
||||
---
|
||||
### Getting Started
|
||||
Analysts using dbt can transform their data by simply writing select statements, while dbt handles turning these statements into tables and views in a data warehouse.
|
||||
|
||||
- [What is dbt]?
|
||||
- Read the [dbt viewpoint]
|
||||
- [Installation]
|
||||
- Join the [chat][slack-url] on Slack for live questions and support.
|
||||
These select statements, or "models", form a dbt project. Models frequently build on top of one another – dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
|
||||
|
||||
---
|
||||
### The dbt ecosystem
|
||||
- Visualize your dbt graph [here](https://graph.sinterdata.com/)
|
||||
- Run your dbt projects on a schedule [here](http://sinterdata.com/)
|
||||

|
||||
|
||||
---
|
||||
## Getting started
|
||||
|
||||
[](https://codeclimate.com/github/fishtown-analytics/dbt) [](https://slack.getdbt.com)
|
||||
- [Install dbt](https://docs.getdbt.com/docs/installation)
|
||||
- Read the [documentation](https://docs.getdbt.com/).
|
||||
- Productionize your dbt project with [dbt Cloud](https://www.getdbt.com)
|
||||
|
||||
### Testing
|
||||
## Find out more
|
||||
|
||||
| service | development | master |
|
||||
| --- | --- | --- |
|
||||
| CircleCI| [](https://circleci.com/gh/fishtown-analytics/dbt/tree/development) | [](https://circleci.com/gh/fishtown-analytics/dbt/tree/master) |
|
||||
| AppVeyor | [](https://ci.appveyor.com/project/DrewBanin/dbt/branch/development) | [](https://ci.appveyor.com/project/DrewBanin/dbt/branch/master) |
|
||||
- Check out the [Introduction to dbt](https://dbt.readme.io/docs/introduction).
|
||||
- Read the [dbt Viewpoint](https://dbt.readme.io/docs/viewpoint).
|
||||
|
||||
[Coverage](https://circleci.com/api/v1/project/fishtown-analytics/dbt/latest/artifacts/0/$CIRCLE_ARTIFACTS/htmlcov/index.html?branch=development)
|
||||
## Join thousands of analysts in the dbt community
|
||||
|
||||
- Join the [chat](http://slack.getdbt.com/) on Slack.
|
||||
- Find community posts on [dbt Discourse](https://discourse.getdbt.com).
|
||||
|
||||
## Reporting bugs and contributing code
|
||||
|
||||
- Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt/issues/new).
|
||||
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](/CONTRIBUTING.md)
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct].
|
||||
|
||||
|
||||
|
||||
[PyPA Code of Conduct]: https://www.pypa.io/en/latest/code-of-conduct/
|
||||
[slack-url]: https://slack.getdbt.com/
|
||||
[Installation]: https://docs.getdbt.com/docs/installation
|
||||
[What is dbt]: https://docs.getdbt.com/docs/overview
|
||||
[dbt viewpoint]: https://docs.getdbt.com/docs/viewpoint
|
||||
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/).
|
||||
|
||||
25
RELEASE.md
25
RELEASE.md
@@ -11,19 +11,22 @@ dbt has three types of branches:
|
||||
#### Git & PyPI
|
||||
|
||||
1. Update CHANGELOG.md with the most recent changes
|
||||
2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to master. Open a Pull Request in Github to merge it.
|
||||
2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`)
|
||||
3. Bump the version using `bumpversion`:
|
||||
- Dry run first by running `bumpversion --new-version <desired-version> <part>` and checking the diff. If it looks correct, clean up the chanages and move on:
|
||||
- Alpha releases: `bumpversion --commit --tag --new-version 0.10.2a1 num`
|
||||
- Patch releases: `bumpversion --commit --tag --new-version 0.10.2 patch`
|
||||
- Minor releases: `bumpversion --commit --tag --new-version 0.11.0 minor`
|
||||
- Major releases: `bumpversion --commit --tag --new-version 1.0.0 major`
|
||||
4. Deploy to pypi
|
||||
- `python setup.py sdist upload -r pypi`
|
||||
5. Deploy to homebrew (see below)
|
||||
6. Deploy to conda-forge (see below)
|
||||
7. Git release notes (points to changelog)
|
||||
8. Post to slack (point to changelog)
|
||||
- Alpha releases: `bumpversion --commit --no-tag --new-version 0.10.2a1 num`
|
||||
- Patch releases: `bumpversion --commit --no-tag --new-version 0.10.2 patch`
|
||||
- Minor releases: `bumpversion --commit --no-tag --new-version 0.11.0 minor`
|
||||
- Major releases: `bumpversion --commit --no-tag --new-version 1.0.0 major`
|
||||
4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`.
|
||||
5. Update the default branch to the next dev release branch.
|
||||
6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!!
|
||||
7. Deploy to pypi
|
||||
- `twine upload dist/*`
|
||||
8. Deploy to homebrew (see below)
|
||||
9. Deploy to conda-forge (see below)
|
||||
10. Git release notes (points to changelog)
|
||||
11. Post to slack (point to changelog)
|
||||
|
||||
After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward.
|
||||
|
||||
|
||||
63
appveyor.yml
63
appveyor.yml
@@ -1,63 +0,0 @@
|
||||
version: 1.0.{build}-{branch}
|
||||
|
||||
environment:
|
||||
# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
|
||||
# /E:ON and /V:ON options are not enabled in the batch script intepreter
|
||||
# See: http://stackoverflow.com/a/13751649/163740
|
||||
CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
|
||||
TOX_ENV: "pywin"
|
||||
|
||||
matrix:
|
||||
- PYTHON: "C:\\Python35"
|
||||
PYTHON_VERSION: "3.5.2"
|
||||
PYTHON_ARCH: "32"
|
||||
|
||||
#- PYTHON: "C:\\Python35"
|
||||
# PYTHON_VERSION: "3.5.2"
|
||||
# PYTHON_ARCH: "32"
|
||||
|
||||
PGUSER: postgres
|
||||
PGPASSWORD: Password12!
|
||||
|
||||
services:
|
||||
- postgresql94
|
||||
|
||||
hosts:
|
||||
database: 127.0.0.1
|
||||
|
||||
init:
|
||||
- PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH%
|
||||
- ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all ::1/128 trust"
|
||||
- ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"
|
||||
|
||||
install:
|
||||
# Download setup scripts and unzip
|
||||
- ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip"
|
||||
- "7z e master.zip */appveyor/* -oappveyor"
|
||||
|
||||
# Install Python (from the official .msi of http://python.org) and pip when
|
||||
# not already installed.
|
||||
- "powershell ./appveyor/install.ps1"
|
||||
|
||||
# Prepend newly installed Python to the PATH of this build (this cannot be
|
||||
# done from inside the powershell script as it would require to restart
|
||||
# the parent CMD process).
|
||||
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
|
||||
|
||||
# Check that we have the expected version and architecture for Python
|
||||
- "python --version"
|
||||
- "python -c \"import struct; print(struct.calcsize('P') * 8)\""
|
||||
|
||||
build: false # Not a C# project, build stuff at the test step instead.
|
||||
|
||||
before_test:
|
||||
- "%CMD_IN_ENV% pip install psycopg2==2.6.2"
|
||||
- "%CMD_IN_ENV% pip install tox"
|
||||
|
||||
test_script:
|
||||
- "bash test/setup_db.sh"
|
||||
|
||||
# this is generally a bad idea TODO
|
||||
- git config --system http.sslverify false
|
||||
|
||||
- "%CMD_IN_ENV% tox -e %TOX_ENV%"
|
||||
125
azure-pipelines.yml
Normal file
125
azure-pipelines.yml
Normal file
@@ -0,0 +1,125 @@
|
||||
# Python package
|
||||
# Create and test a Python package on multiple Python versions.
|
||||
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
|
||||
# https://docs.microsoft.com/azure/devops/pipelines/languages/python
|
||||
|
||||
trigger:
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- dev/*
|
||||
- pr/*
|
||||
|
||||
jobs:
|
||||
- job: UnitTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.5'
|
||||
architecture: 'x64'
|
||||
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-unit
|
||||
displayName: Run unit tests
|
||||
|
||||
- job: PostgresIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: UnitTest
|
||||
|
||||
steps:
|
||||
- pwsh: |
|
||||
choco install postgresql --params '/Password:password' --params-global --version 10.6
|
||||
Set-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all ::1/128 trust"
|
||||
Add-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"
|
||||
# the service name is "postgresql-x64-10", conveniently it's both the display name and the actual name
|
||||
Restart-Service postgresql-x64-10
|
||||
|
||||
& "C:\program files\postgresql\10\bin\createdb.exe" -U postgres dbt
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE root WITH PASSWORD 'password';"
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE root WITH LOGIN;"
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;"
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;"
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE noaccess WITH LOGIN;"
|
||||
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CONNECT ON DATABASE dbt TO noaccess;"
|
||||
displayName: Install postgresql and set up database
|
||||
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.5'
|
||||
architecture: 'x64'
|
||||
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-postgres
|
||||
displayName: Run integration tests
|
||||
|
||||
# These three are all similar except secure environment variables, which MUST be passed along to their tasks,
|
||||
# but there's probably a better way to do this!
|
||||
- job: SnowflakeIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.5'
|
||||
architecture: 'x64'
|
||||
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-snowflake
|
||||
env:
|
||||
SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT)
|
||||
SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD)
|
||||
SNOWFLAKE_TEST_USER: $(SNOWFLAKE_TEST_USER)
|
||||
SNOWFLAKE_TEST_WAREHOUSE: $(SNOWFLAKE_TEST_WAREHOUSE)
|
||||
displayName: Run integration tests
|
||||
|
||||
- job: BigQueryIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.5'
|
||||
architecture: 'x64'
|
||||
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
- script: python -m tox -e pywin-bigquery
|
||||
env:
|
||||
BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON)
|
||||
displayName: Run integration tests
|
||||
|
||||
- job: RedshiftIntegrationTest
|
||||
pool:
|
||||
vmImage: 'vs2017-win2016'
|
||||
dependsOn: PostgresIntegrationTest
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.5'
|
||||
architecture: 'x64'
|
||||
|
||||
- script: python -m pip install --upgrade pip && pip install tox
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: python -m tox -e pywin-redshift
|
||||
env:
|
||||
REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME)
|
||||
REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS)
|
||||
REDSHIFT_TEST_USER: $(REDSHIFT_TEST_USER)
|
||||
REDSHIFT_TEST_PORT: $(REDSHIFT_TEST_PORT)
|
||||
REDSHIFT_TEST_HOST: $(REDSHIFT_TEST_HOST)
|
||||
displayName: Run integration tests
|
||||
1
core/dbt/__init__.py
Normal file
1
core/dbt/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
||||
1
core/dbt/adapters/__init__.py
Normal file
1
core/dbt/adapters/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
||||
8
core/dbt/adapters/base/__init__.py
Normal file
8
core/dbt/adapters/base/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# these are all just exports, #noqa them so flake8 will be happy
|
||||
from dbt.adapters.base.meta import available # noqa
|
||||
from dbt.adapters.base.relation import BaseRelation # noqa
|
||||
from dbt.adapters.base.relation import Column # noqa
|
||||
from dbt.adapters.base.connections import BaseConnectionManager # noqa
|
||||
from dbt.adapters.base.connections import Credentials # noqa
|
||||
from dbt.adapters.base.impl import BaseAdapter # noqa
|
||||
from dbt.adapters.base.plugin import AdapterPlugin # noqa
|
||||
329
core/dbt/adapters/base/connections.py
Normal file
329
core/dbt/adapters/base/connections.py
Normal file
@@ -0,0 +1,329 @@
|
||||
import abc
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
import six
|
||||
|
||||
import dbt.exceptions
|
||||
import dbt.flags
|
||||
from dbt.api import APIObject
|
||||
from dbt.compat import abstractclassmethod, get_ident
|
||||
from dbt.contracts.connection import Connection
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.utils import translate_aliases
|
||||
|
||||
|
||||
class Credentials(APIObject):
|
||||
"""Common base class for credentials. This is not valid to instantiate"""
|
||||
SCHEMA = NotImplemented
|
||||
# map credential aliases to their canonical names.
|
||||
ALIASES = {}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
renamed = self.translate_aliases(kwargs)
|
||||
super(Credentials, self).__init__(**renamed)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
raise NotImplementedError(
|
||||
'type not implemented for base credentials class'
|
||||
)
|
||||
|
||||
def connection_info(self):
|
||||
"""Return an ordered iterator of key/value pairs for pretty-printing.
|
||||
"""
|
||||
for key in self._connection_keys():
|
||||
if key in self._contents:
|
||||
yield key, self._contents[key]
|
||||
|
||||
def _connection_keys(self):
|
||||
"""The credential object keys that should be printed to users in
|
||||
'dbt debug' output. This is specific to each adapter.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def incorporate(self, **kwargs):
|
||||
# implementation note: we have to do this here, or
|
||||
# incorporate(alias_name=...) will result in duplicate keys in the
|
||||
# merged dict that APIObject.incorporate() creates.
|
||||
renamed = self.translate_aliases(kwargs)
|
||||
return super(Credentials, self).incorporate(**renamed)
|
||||
|
||||
def serialize(self, with_aliases=False):
|
||||
serialized = super(Credentials, self).serialize()
|
||||
if with_aliases:
|
||||
serialized.update({
|
||||
new_name: serialized[canonical_name]
|
||||
for new_name, canonical_name in self.ALIASES.items()
|
||||
if canonical_name in serialized
|
||||
})
|
||||
return serialized
|
||||
|
||||
@classmethod
|
||||
def translate_aliases(cls, kwargs):
|
||||
return translate_aliases(kwargs, cls.ALIASES)
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class BaseConnectionManager(object):
|
||||
"""Methods to implement:
|
||||
- exception_handler
|
||||
- cancel_open
|
||||
- open
|
||||
- begin
|
||||
- commit
|
||||
- clear_transaction
|
||||
- execute
|
||||
|
||||
You must also set the 'TYPE' class attribute with a class-unique constant
|
||||
string.
|
||||
"""
|
||||
TYPE = NotImplemented
|
||||
|
||||
def __init__(self, profile):
|
||||
self.profile = profile
|
||||
self.thread_connections = {}
|
||||
self.lock = multiprocessing.RLock()
|
||||
|
||||
@staticmethod
|
||||
def get_thread_identifier():
|
||||
# note that get_ident() may be re-used, but we should never experience
|
||||
# that within a single process
|
||||
return (os.getpid(), get_ident())
|
||||
|
||||
def get_thread_connection(self):
|
||||
key = self.get_thread_identifier()
|
||||
with self.lock:
|
||||
if key not in self.thread_connections:
|
||||
raise RuntimeError(
|
||||
'connection never acquired for thread {}, have {}'
|
||||
.format(key, list(self.thread_connections))
|
||||
)
|
||||
return self.thread_connections[key]
|
||||
|
||||
def get_if_exists(self):
|
||||
key = self.get_thread_identifier()
|
||||
with self.lock:
|
||||
return self.thread_connections.get(key)
|
||||
|
||||
def clear_thread_connection(self):
|
||||
key = self.get_thread_identifier()
|
||||
with self.lock:
|
||||
if key in self.thread_connections:
|
||||
del self.thread_connections[key]
|
||||
|
||||
def clear_transaction(self):
|
||||
"""Clear any existing transactions."""
|
||||
conn = self.get_thread_connection()
|
||||
if conn is not None:
|
||||
if conn.transaction_open:
|
||||
self._rollback(conn)
|
||||
self.begin()
|
||||
self.commit()
|
||||
|
||||
@abc.abstractmethod
|
||||
def exception_handler(self, sql):
|
||||
"""Create a context manager that handles exceptions caused by database
|
||||
interactions.
|
||||
|
||||
:param str sql: The SQL string that the block inside the context
|
||||
manager is executing.
|
||||
:return: A context manager that handles exceptions raised by the
|
||||
underlying database.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`exception_handler` is not implemented for this adapter!')
|
||||
|
||||
def set_connection_name(self, name=None):
|
||||
if name is None:
|
||||
# if a name isn't specified, we'll re-use a single handle
|
||||
# named 'master'
|
||||
name = 'master'
|
||||
|
||||
conn = self.get_if_exists()
|
||||
thread_id_key = self.get_thread_identifier()
|
||||
|
||||
if conn is None:
|
||||
conn = Connection(
|
||||
type=self.TYPE,
|
||||
name=None,
|
||||
state='init',
|
||||
transaction_open=False,
|
||||
handle=None,
|
||||
credentials=self.profile.credentials
|
||||
)
|
||||
self.thread_connections[thread_id_key] = conn
|
||||
|
||||
if conn.name == name and conn.state == 'open':
|
||||
return conn
|
||||
|
||||
logger.debug('Acquiring new {} connection "{}".'
|
||||
.format(self.TYPE, name))
|
||||
|
||||
if conn.state == 'open':
|
||||
logger.debug(
|
||||
'Re-using an available connection from the pool (formerly {}).'
|
||||
.format(conn.name))
|
||||
else:
|
||||
logger.debug('Opening a new connection, currently in state {}'
|
||||
.format(conn.state))
|
||||
self.open(conn)
|
||||
|
||||
conn.name = name
|
||||
return conn
|
||||
|
||||
@abc.abstractmethod
|
||||
def cancel_open(self):
|
||||
"""Cancel all open connections on the adapter. (passable)"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`cancel_open` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abstractclassmethod
|
||||
def open(cls, connection):
|
||||
"""Open a connection on the adapter.
|
||||
|
||||
This may mutate the given connection (in particular, its state and its
|
||||
handle).
|
||||
|
||||
This should be thread-safe, or hold the lock if necessary. The given
|
||||
connection should not be in either in_use or available.
|
||||
|
||||
:param Connection connection: A connection object to open.
|
||||
:return: A connection with a handle attached and an 'open' state.
|
||||
:rtype: Connection
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`open` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
def release(self):
|
||||
with self.lock:
|
||||
conn = self.get_if_exists()
|
||||
if conn is None:
|
||||
return
|
||||
|
||||
try:
|
||||
if conn.state == 'open':
|
||||
if conn.transaction_open is True:
|
||||
self._rollback(conn)
|
||||
else:
|
||||
self.close(conn)
|
||||
except Exception:
|
||||
# if rollback or close failed, remove our busted connection
|
||||
self.clear_thread_connection()
|
||||
raise
|
||||
|
||||
def cleanup_all(self):
|
||||
with self.lock:
|
||||
for connection in self.thread_connections.values():
|
||||
if connection.state not in {'closed', 'init'}:
|
||||
logger.debug("Connection '{}' was left open."
|
||||
.format(connection.name))
|
||||
else:
|
||||
logger.debug("Connection '{}' was properly closed."
|
||||
.format(connection.name))
|
||||
self.close(connection)
|
||||
|
||||
# garbage collect these connections
|
||||
self.thread_connections.clear()
|
||||
|
||||
@abc.abstractmethod
|
||||
def begin(self):
|
||||
"""Begin a transaction. (passable)
|
||||
|
||||
:param str name: The name of the connection to use.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`begin` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
def commit(self):
|
||||
"""Commit a transaction. (passable)"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`commit` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _rollback_handle(cls, connection):
|
||||
"""Perform the actual rollback operation."""
|
||||
try:
|
||||
connection.handle.rollback()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
'Failed to rollback {}'.format(connection.name),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _close_handle(cls, connection):
|
||||
"""Perform the actual close operation."""
|
||||
# On windows, sometimes connection handles don't have a close() attr.
|
||||
if hasattr(connection.handle, 'close'):
|
||||
logger.debug('On {}: Close'.format(connection.name))
|
||||
connection.handle.close()
|
||||
else:
|
||||
logger.debug('On {}: No close available on handle'
|
||||
.format(connection.name))
|
||||
|
||||
@classmethod
|
||||
def _rollback(cls, connection):
|
||||
"""Roll back the given connection.
|
||||
"""
|
||||
if dbt.flags.STRICT_MODE:
|
||||
assert isinstance(connection, Connection)
|
||||
|
||||
if connection.transaction_open is False:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Tried to rollback transaction on connection "{}", but '
|
||||
'it does not have one open!'.format(connection.name))
|
||||
|
||||
logger.debug('On {}: ROLLBACK'.format(connection.name))
|
||||
cls._rollback_handle(connection)
|
||||
|
||||
connection.transaction_open = False
|
||||
|
||||
return connection
|
||||
|
||||
@classmethod
|
||||
def close(cls, connection):
|
||||
if dbt.flags.STRICT_MODE:
|
||||
assert isinstance(connection, Connection)
|
||||
|
||||
# if the connection is in closed or init, there's nothing to do
|
||||
if connection.state in {'closed', 'init'}:
|
||||
return connection
|
||||
|
||||
if connection.transaction_open and connection.handle:
|
||||
cls._rollback_handle(connection)
|
||||
connection.transaction_open = False
|
||||
|
||||
cls._close_handle(connection)
|
||||
connection.state = 'closed'
|
||||
|
||||
return connection
|
||||
|
||||
def commit_if_has_connection(self):
|
||||
"""If the named connection exists, commit the current transaction.
|
||||
|
||||
:param str name: The name of the connection to use.
|
||||
"""
|
||||
connection = self.get_if_exists()
|
||||
if connection:
|
||||
self.commit()
|
||||
|
||||
@abc.abstractmethod
|
||||
def execute(self, sql, auto_begin=False, fetch=False):
|
||||
"""Execute the given SQL.
|
||||
|
||||
:param str sql: The sql to execute.
|
||||
:param bool auto_begin: If set, and dbt is not currently inside a
|
||||
transaction, automatically begin one.
|
||||
:param bool fetch: If set, fetch results.
|
||||
:return: A tuple of the status and the results (empty if fetch=False).
|
||||
:rtype: Tuple[str, agate.Table]
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`execute` is not implemented for this adapter!'
|
||||
)
|
||||
957
core/dbt/adapters/base/impl.py
Normal file
957
core/dbt/adapters/base/impl.py
Normal file
@@ -0,0 +1,957 @@
|
||||
import abc
|
||||
from contextlib import contextmanager
|
||||
|
||||
import agate
|
||||
import pytz
|
||||
import six
|
||||
|
||||
import dbt.exceptions
|
||||
import dbt.flags
|
||||
import dbt.clients.agate_helper
|
||||
|
||||
from dbt.compat import abstractclassmethod, classmethod
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.loader import GraphLoader
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.utils import filter_null_values
|
||||
|
||||
|
||||
from dbt.adapters.base.meta import AdapterMeta, available
|
||||
from dbt.adapters.base import BaseRelation
|
||||
from dbt.adapters.base import Column
|
||||
from dbt.adapters.cache import RelationsCache
|
||||
|
||||
|
||||
GET_CATALOG_MACRO_NAME = 'get_catalog'
|
||||
FRESHNESS_MACRO_NAME = 'collect_freshness'
|
||||
|
||||
|
||||
def _expect_row_value(key, row):
|
||||
if key not in row.keys():
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Got a row without "{}" column, columns: {}'
|
||||
.format(key, row.keys())
|
||||
)
|
||||
return row[key]
|
||||
|
||||
|
||||
def _relations_filter_schemas(schemas):
|
||||
def test(row):
|
||||
referenced_schema = _expect_row_value('referenced_schema', row)
|
||||
dependent_schema = _expect_row_value('dependent_schema', row)
|
||||
# handle the null schema
|
||||
if referenced_schema is not None:
|
||||
referenced_schema = referenced_schema.lower()
|
||||
if dependent_schema is not None:
|
||||
dependent_schema = dependent_schema.lower()
|
||||
return referenced_schema in schemas or dependent_schema in schemas
|
||||
return test
|
||||
|
||||
|
||||
def _catalog_filter_schemas(manifest):
|
||||
"""Return a function that takes a row and decides if the row should be
|
||||
included in the catalog output.
|
||||
"""
|
||||
schemas = frozenset((d.lower(), s.lower())
|
||||
for d, s in manifest.get_used_schemas())
|
||||
|
||||
def test(row):
|
||||
table_database = _expect_row_value('table_database', row)
|
||||
table_schema = _expect_row_value('table_schema', row)
|
||||
# the schema may be present but None, which is not an error and should
|
||||
# be filtered out
|
||||
if table_schema is None:
|
||||
return False
|
||||
return (table_database.lower(), table_schema.lower()) in schemas
|
||||
return test
|
||||
|
||||
|
||||
def _utc(dt, source, field_name):
|
||||
"""If dt has a timezone, return a new datetime that's in UTC. Otherwise,
|
||||
assume the datetime is already for UTC and add the timezone.
|
||||
"""
|
||||
if dt is None:
|
||||
raise dbt.exceptions.raise_database_error(
|
||||
"Expected a non-null value when querying field '{}' of table "
|
||||
" {} but received value 'null' instead".format(
|
||||
field_name,
|
||||
source))
|
||||
|
||||
elif not hasattr(dt, 'tzinfo'):
|
||||
raise dbt.exceptions.raise_database_error(
|
||||
"Expected a timestamp value when querying field '{}' of table "
|
||||
"{} but received value of type '{}' instead".format(
|
||||
field_name,
|
||||
source,
|
||||
type(dt).__name__))
|
||||
|
||||
elif dt.tzinfo:
|
||||
return dt.astimezone(pytz.UTC)
|
||||
else:
|
||||
return dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
|
||||
class SchemaSearchMap(dict):
|
||||
"""A utility class to keep track of what information_schema tables to
|
||||
search for what schemas
|
||||
"""
|
||||
def add(self, relation):
|
||||
key = relation.information_schema_only()
|
||||
if key not in self:
|
||||
self[key] = set()
|
||||
self[key].add(relation.schema.lower())
|
||||
|
||||
def search(self):
|
||||
for information_schema_name, schemas in self.items():
|
||||
for schema in schemas:
|
||||
yield information_schema_name, schema
|
||||
|
||||
def schemas_searched(self):
|
||||
result = set()
|
||||
for information_schema_name, schemas in self.items():
|
||||
result.update(
|
||||
(information_schema_name.database, schema)
|
||||
for schema in schemas
|
||||
)
|
||||
return result
|
||||
|
||||
def flatten(self):
|
||||
new = self.__class__()
|
||||
|
||||
database = None
|
||||
# iterate once to look for a database name
|
||||
seen = {r.database.lower() for r in self if r.database}
|
||||
if len(seen) > 1:
|
||||
dbt.exceptions.raise_compiler_error(str(seen))
|
||||
elif len(seen) == 1:
|
||||
database = list(seen)[0]
|
||||
|
||||
for information_schema_name, schema in self.search():
|
||||
new.add(information_schema_name.incorporate(
|
||||
path={'database': database, 'schema': schema},
|
||||
quote_policy={'database': False},
|
||||
include_policy={'database': False},
|
||||
))
|
||||
|
||||
return new
|
||||
|
||||
|
||||
@six.add_metaclass(AdapterMeta)
|
||||
class BaseAdapter(object):
|
||||
"""The BaseAdapter provides an abstract base class for adapters.
|
||||
|
||||
Adapters must implement the following methods and macros. Some of the
|
||||
methods can be safely overridden as a noop, where it makes sense
|
||||
(transactions on databases that don't support them, for instance). Those
|
||||
methods are marked with a (passable) in their docstrings. Check docstrings
|
||||
for type information, etc.
|
||||
|
||||
To implement a macro, implement "${adapter_type}__${macro_name}". in the
|
||||
adapter's internal project.
|
||||
|
||||
Methods:
|
||||
- exception_handler
|
||||
- date_function
|
||||
- list_schemas
|
||||
- drop_relation
|
||||
- truncate_relation
|
||||
- rename_relation
|
||||
- get_columns_in_relation
|
||||
- expand_column_types
|
||||
- list_relations_without_caching
|
||||
- is_cancelable
|
||||
- create_schema
|
||||
- drop_schema
|
||||
- quote
|
||||
- convert_text_type
|
||||
- convert_number_type
|
||||
- convert_boolean_type
|
||||
- convert_datetime_type
|
||||
- convert_date_type
|
||||
- convert_time_type
|
||||
|
||||
Macros:
|
||||
- get_catalog
|
||||
"""
|
||||
requires = {}
|
||||
|
||||
Relation = BaseRelation
|
||||
Column = Column
|
||||
# This should be an implementation of BaseConnectionManager
|
||||
ConnectionManager = None
|
||||
|
||||
# A set of clobber config fields accepted by this adapter
|
||||
# for use in materializations
|
||||
AdapterSpecificConfigs = frozenset()
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.cache = RelationsCache()
|
||||
self.connections = self.ConnectionManager(config)
|
||||
self._internal_manifest_lazy = None
|
||||
|
||||
###
|
||||
# Methods that pass through to the connection manager
|
||||
###
|
||||
def acquire_connection(self, name=None):
|
||||
return self.connections.set_connection_name(name)
|
||||
|
||||
def release_connection(self):
|
||||
return self.connections.release()
|
||||
|
||||
def cleanup_connections(self):
|
||||
return self.connections.cleanup_all()
|
||||
|
||||
def clear_transaction(self):
|
||||
self.connections.clear_transaction()
|
||||
|
||||
def commit_if_has_connection(self):
|
||||
return self.connections.commit_if_has_connection()
|
||||
|
||||
def nice_connection_name(self):
|
||||
conn = self.connections.get_thread_connection()
|
||||
if conn is None or conn.name is None:
|
||||
return '<None>'
|
||||
return conn.name
|
||||
|
||||
@contextmanager
|
||||
def connection_named(self, name):
|
||||
try:
|
||||
yield self.acquire_connection(name)
|
||||
finally:
|
||||
self.release_connection()
|
||||
|
||||
@available.parse(lambda *a, **k: ('', dbt.clients.agate_helper()))
|
||||
def execute(self, sql, auto_begin=False, fetch=False):
|
||||
"""Execute the given SQL. This is a thin wrapper around
|
||||
ConnectionManager.execute.
|
||||
|
||||
:param str sql: The sql to execute.
|
||||
:param bool auto_begin: If set, and dbt is not currently inside a
|
||||
transaction, automatically begin one.
|
||||
:param bool fetch: If set, fetch results.
|
||||
:return: A tuple of the status and the results (empty if fetch=False).
|
||||
:rtype: Tuple[str, agate.Table]
|
||||
"""
|
||||
return self.connections.execute(
|
||||
sql=sql,
|
||||
auto_begin=auto_begin,
|
||||
fetch=fetch
|
||||
)
|
||||
|
||||
###
|
||||
# Methods that should never be overridden
|
||||
###
|
||||
@classmethod
|
||||
def type(cls):
|
||||
"""Get the type of this adapter. Types must be class-unique and
|
||||
consistent.
|
||||
|
||||
:return: The type name
|
||||
:rtype: str
|
||||
"""
|
||||
return cls.ConnectionManager.TYPE
|
||||
|
||||
@property
|
||||
def _internal_manifest(self):
|
||||
if self._internal_manifest_lazy is None:
|
||||
manifest = GraphLoader.load_internal(self.config)
|
||||
self._internal_manifest_lazy = manifest
|
||||
return self._internal_manifest_lazy
|
||||
|
||||
def check_internal_manifest(self):
|
||||
"""Return the internal manifest (used for executing macros) if it's
|
||||
been initialized, otherwise return None.
|
||||
"""
|
||||
return self._internal_manifest_lazy
|
||||
|
||||
###
|
||||
# Caching methods
|
||||
###
|
||||
def _schema_is_cached(self, database, schema):
|
||||
"""Check if the schema is cached, and by default logs if it is not."""
|
||||
|
||||
if dbt.flags.USE_CACHE is False:
|
||||
return False
|
||||
elif (database, schema) not in self.cache:
|
||||
logger.debug(
|
||||
'On "{}": cache miss for schema "{}.{}", this is inefficient'
|
||||
.format(self.nice_connection_name(), database, schema)
|
||||
)
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _relations_filter_table(cls, table, schemas):
|
||||
"""Filter the table as appropriate for relations table entries.
|
||||
Subclasses can override this to change filtering rules on a per-adapter
|
||||
basis.
|
||||
"""
|
||||
return table.where(_relations_filter_schemas(schemas))
|
||||
|
||||
def _get_cache_schemas(self, manifest, exec_only=False):
|
||||
"""Get a mapping of each node's "information_schema" relations to a
|
||||
set of all schemas expected in that information_schema.
|
||||
|
||||
There may be keys that are technically duplicates on the database side,
|
||||
for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
|
||||
databases, and values could overlap as appropriate. All values are
|
||||
lowercase strings.
|
||||
"""
|
||||
info_schema_name_map = SchemaSearchMap()
|
||||
for node in manifest.nodes.values():
|
||||
if exec_only and node.resource_type not in NodeType.executable():
|
||||
continue
|
||||
relation = self.Relation.create_from(self.config, node)
|
||||
info_schema_name_map.add(relation)
|
||||
# result is a map whose keys are information_schema Relations without
|
||||
# identifiers that have appropriate database prefixes, and whose values
|
||||
# are sets of lowercase schema names that are valid members of those
|
||||
# schemas
|
||||
return info_schema_name_map
|
||||
|
||||
def _relations_cache_for_schemas(self, manifest):
|
||||
"""Populate the relations cache for the given schemas. Returns an
|
||||
iteratble of the schemas populated, as strings.
|
||||
"""
|
||||
if not dbt.flags.USE_CACHE:
|
||||
return
|
||||
|
||||
info_schema_name_map = self._get_cache_schemas(manifest,
|
||||
exec_only=True)
|
||||
for db, schema in info_schema_name_map.search():
|
||||
for relation in self.list_relations_without_caching(db, schema):
|
||||
self.cache.add(relation)
|
||||
|
||||
# it's possible that there were no relations in some schemas. We want
|
||||
# to insert the schemas we query into the cache's `.schemas` attribute
|
||||
# so we can check it later
|
||||
self.cache.update_schemas(info_schema_name_map.schemas_searched())
|
||||
|
||||
def set_relations_cache(self, manifest, clear=False):
|
||||
"""Run a query that gets a populated cache of the relations in the
|
||||
database and set the cache on this adapter.
|
||||
"""
|
||||
if not dbt.flags.USE_CACHE:
|
||||
return
|
||||
|
||||
with self.cache.lock:
|
||||
if clear:
|
||||
self.cache.clear()
|
||||
self._relations_cache_for_schemas(manifest)
|
||||
|
||||
def cache_new_relation(self, relation):
|
||||
"""Cache a new relation in dbt. It will show up in `list relations`."""
|
||||
if relation is None:
|
||||
name = self.nice_connection_name()
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Attempted to cache a null relation for {}'.format(name)
|
||||
)
|
||||
if dbt.flags.USE_CACHE:
|
||||
self.cache.add(relation)
|
||||
# so jinja doesn't render things
|
||||
return ''
|
||||
|
||||
###
|
||||
# Abstract methods for database-specific values, attributes, and types
|
||||
###
|
||||
@abstractclassmethod
|
||||
def date_function(cls):
|
||||
"""Get the date function used by this adapter's database.
|
||||
|
||||
:return: The date function
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`date_function` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def is_cancelable(cls):
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`is_cancelable` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
###
|
||||
# Abstract methods about schemas
|
||||
###
|
||||
@abc.abstractmethod
|
||||
def list_schemas(self, database):
|
||||
"""Get a list of existing schemas.
|
||||
|
||||
:param str database: The name of the database to list under.
|
||||
:return: All schemas that currently exist in the database
|
||||
:rtype: List[str]
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`list_schemas` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@available.parse(lambda *a, **k: False)
|
||||
def check_schema_exists(self, database, schema):
|
||||
"""Check if a schema exists.
|
||||
|
||||
The default implementation of this is potentially unnecessarily slow,
|
||||
and adapters should implement it if there is an optimized path (and
|
||||
there probably is)
|
||||
"""
|
||||
search = (
|
||||
s.lower() for s in
|
||||
self.list_schemas(database=database)
|
||||
)
|
||||
return schema.lower() in search
|
||||
|
||||
###
|
||||
# Abstract methods about relations
|
||||
###
|
||||
@abc.abstractmethod
|
||||
@available.parse_none
|
||||
def drop_relation(self, relation):
|
||||
"""Drop the given relation.
|
||||
|
||||
*Implementors must call self.cache.drop() to preserve cache state!*
|
||||
|
||||
:param self.Relation relation: The relation to drop
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`drop_relation` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
@available.parse_none
|
||||
def truncate_relation(self, relation):
|
||||
"""Truncate the given relation.
|
||||
|
||||
:param self.Relation relation: The relation to truncate
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`truncate_relation` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
@available.parse_none
|
||||
def rename_relation(self, from_relation, to_relation):
|
||||
"""Rename the relation from from_relation to to_relation.
|
||||
|
||||
Implementors must call self.cache.rename() to preserve cache state.
|
||||
|
||||
:param self.Relation from_relation: The original relation name
|
||||
:param self.Relation to_relation: The new relation name
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`rename_relation` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
@available.parse_list
|
||||
def get_columns_in_relation(self, relation):
|
||||
"""Get a list of the columns in the given Relation.
|
||||
|
||||
:param self.Relation relation: The relation to query for.
|
||||
:return: Information about all columns in the given relation.
|
||||
:rtype: List[self.Column]
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`get_columns_in_relation` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@available.deprecated('get_columns_in_relation', lambda *a, **k: [])
|
||||
def get_columns_in_table(self, schema, identifier):
|
||||
"""DEPRECATED: Get a list of the columns in the given table."""
|
||||
relation = self.Relation.create(
|
||||
database=self.config.credentials.database,
|
||||
schema=schema,
|
||||
identifier=identifier,
|
||||
quote_policy=self.config.quoting
|
||||
)
|
||||
return self.get_columns_in_relation(relation)
|
||||
|
||||
@abc.abstractmethod
|
||||
def expand_column_types(self, goal, current):
|
||||
"""Expand the current table's types to match the goal table. (passable)
|
||||
|
||||
:param self.Relation goal: A relation that currently exists in the
|
||||
database with columns of the desired types.
|
||||
:param self.Relation current: A relation that currently exists in the
|
||||
database with columns of unspecified types.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`expand_target_column_types` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
def list_relations_without_caching(self, information_schema, schema):
|
||||
"""List relations in the given schema, bypassing the cache.
|
||||
|
||||
This is used as the underlying behavior to fill the cache.
|
||||
|
||||
:param Relation information_schema: The information schema to list
|
||||
relations from.
|
||||
:param str schema: The name of the schema to list relations from.
|
||||
:return: The relations in schema
|
||||
:rtype: List[self.Relation]
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`list_relations_without_caching` is not implemented for this '
|
||||
'adapter!'
|
||||
)
|
||||
|
||||
###
|
||||
# Provided methods about relations
|
||||
###
|
||||
@available.parse_list
|
||||
def get_missing_columns(self, from_relation, to_relation):
|
||||
"""Returns a list of Columns in from_relation that are missing from
|
||||
to_relation.
|
||||
|
||||
:param Relation from_relation: The relation that might have extra
|
||||
columns
|
||||
:param Relation to_relation: The realtion that might have columns
|
||||
missing
|
||||
:return: The columns in from_relation that are missing from to_relation
|
||||
:rtype: List[self.Relation]
|
||||
"""
|
||||
if not isinstance(from_relation, self.Relation):
|
||||
dbt.exceptions.invalid_type_error(
|
||||
method_name='get_missing_columns',
|
||||
arg_name='from_relation',
|
||||
got_value=from_relation,
|
||||
expected_type=self.Relation)
|
||||
|
||||
if not isinstance(to_relation, self.Relation):
|
||||
dbt.exceptions.invalid_type_error(
|
||||
method_name='get_missing_columns',
|
||||
arg_name='to_relation',
|
||||
got_value=to_relation,
|
||||
expected_type=self.Relation)
|
||||
|
||||
from_columns = {
|
||||
col.name: col for col in
|
||||
self.get_columns_in_relation(from_relation)
|
||||
}
|
||||
|
||||
to_columns = {
|
||||
col.name: col for col in
|
||||
self.get_columns_in_relation(to_relation)
|
||||
}
|
||||
|
||||
missing_columns = set(from_columns.keys()) - set(to_columns.keys())
|
||||
|
||||
return [
|
||||
col for (col_name, col) in from_columns.items()
|
||||
if col_name in missing_columns
|
||||
]
|
||||
|
||||
@available.parse_none
|
||||
def valid_snapshot_target(self, relation):
|
||||
"""Ensure that the target relation is valid, by making sure it has the
|
||||
expected columns.
|
||||
|
||||
:param Relation relation: The relation to check
|
||||
:raises dbt.exceptions.CompilationException: If the columns are
|
||||
incorrect.
|
||||
"""
|
||||
if not isinstance(relation, self.Relation):
|
||||
dbt.exceptions.invalid_type_error(
|
||||
method_name='valid_snapshot_target',
|
||||
arg_name='relation',
|
||||
got_value=relation,
|
||||
expected_type=self.Relation)
|
||||
|
||||
columns = self.get_columns_in_relation(relation)
|
||||
names = set(c.name.lower() for c in columns)
|
||||
expanded_keys = ('scd_id', 'valid_from', 'valid_to')
|
||||
extra = []
|
||||
missing = []
|
||||
for legacy in expanded_keys:
|
||||
desired = 'dbt_' + legacy
|
||||
if desired not in names:
|
||||
missing.append(desired)
|
||||
if legacy in names:
|
||||
extra.append(legacy)
|
||||
|
||||
if missing:
|
||||
if extra:
|
||||
msg = (
|
||||
'Snapshot target has ("{}") but not ("{}") - is it an '
|
||||
'unmigrated previous version archive?'
|
||||
.format('", "'.join(extra), '", "'.join(missing))
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
'Snapshot target is not a snapshot table (missing "{}")'
|
||||
.format('", "'.join(missing))
|
||||
)
|
||||
dbt.exceptions.raise_compiler_error(msg)
|
||||
|
||||
@available.parse_none
|
||||
def expand_target_column_types(self, from_relation, to_relation):
|
||||
if not isinstance(from_relation, self.Relation):
|
||||
dbt.exceptions.invalid_type_error(
|
||||
method_name='expand_target_column_types',
|
||||
arg_name='from_relation',
|
||||
got_value=from_relation,
|
||||
expected_type=self.Relation)
|
||||
|
||||
if not isinstance(to_relation, self.Relation):
|
||||
dbt.exceptions.invalid_type_error(
|
||||
method_name='expand_target_column_types',
|
||||
arg_name='to_relation',
|
||||
got_value=to_relation,
|
||||
expected_type=self.Relation)
|
||||
|
||||
self.expand_column_types(from_relation, to_relation)
|
||||
|
||||
def list_relations(self, database, schema):
|
||||
if self._schema_is_cached(database, schema):
|
||||
return self.cache.get_relations(database, schema)
|
||||
|
||||
information_schema = self.Relation.create(
|
||||
database=database,
|
||||
schema=schema,
|
||||
model_name='',
|
||||
quote_policy=self.config.quoting
|
||||
).information_schema()
|
||||
|
||||
# we can't build the relations cache because we don't have a
|
||||
# manifest so we can't run any operations.
|
||||
relations = self.list_relations_without_caching(
|
||||
information_schema, schema
|
||||
)
|
||||
|
||||
logger.debug('with database={}, schema={}, relations={}'
|
||||
.format(database, schema, relations))
|
||||
return relations
|
||||
|
||||
def _make_match_kwargs(self, database, schema, identifier):
|
||||
quoting = self.config.quoting
|
||||
if identifier is not None and quoting['identifier'] is False:
|
||||
identifier = identifier.lower()
|
||||
|
||||
if schema is not None and quoting['schema'] is False:
|
||||
schema = schema.lower()
|
||||
|
||||
if database is not None and quoting['database'] is False:
|
||||
database = database.lower()
|
||||
|
||||
return filter_null_values({
|
||||
'database': database,
|
||||
'identifier': identifier,
|
||||
'schema': schema,
|
||||
})
|
||||
|
||||
def _make_match(self, relations_list, database, schema, identifier):
|
||||
|
||||
matches = []
|
||||
|
||||
search = self._make_match_kwargs(database, schema, identifier)
|
||||
|
||||
for relation in relations_list:
|
||||
if relation.matches(**search):
|
||||
matches.append(relation)
|
||||
|
||||
return matches
|
||||
|
||||
@available.parse_none
|
||||
def get_relation(self, database, schema, identifier):
|
||||
relations_list = self.list_relations(database, schema)
|
||||
|
||||
matches = self._make_match(relations_list, database, schema,
|
||||
identifier)
|
||||
|
||||
if len(matches) > 1:
|
||||
kwargs = {
|
||||
'identifier': identifier,
|
||||
'schema': schema,
|
||||
'database': database,
|
||||
}
|
||||
dbt.exceptions.get_relation_returned_multiple_results(
|
||||
kwargs, matches
|
||||
)
|
||||
|
||||
elif matches:
|
||||
return matches[0]
|
||||
|
||||
return None
|
||||
|
||||
@available.deprecated('get_relation', lambda *a, **k: False)
|
||||
def already_exists(self, schema, name):
|
||||
"""DEPRECATED: Return if a model already exists in the database"""
|
||||
database = self.config.credentials.database
|
||||
relation = self.get_relation(database, schema, name)
|
||||
return relation is not None
|
||||
|
||||
###
|
||||
# ODBC FUNCTIONS -- these should not need to change for every adapter,
|
||||
# although some adapters may override them
|
||||
###
|
||||
@abc.abstractmethod
|
||||
@available.parse_none
|
||||
def create_schema(self, database, schema):
|
||||
"""Create the given schema if it does not exist.
|
||||
|
||||
:param str schema: The schema name to create.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`create_schema` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
def drop_schema(self, database, schema):
|
||||
"""Drop the given schema (and everything in it) if it exists.
|
||||
|
||||
:param str schema: The schema name to drop.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`drop_schema` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@available
|
||||
@abstractclassmethod
|
||||
def quote(cls, identifier):
|
||||
"""Quote the given identifier, as appropriate for the database.
|
||||
|
||||
:param str identifier: The identifier to quote
|
||||
:return: The quoted identifier
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`quote` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@available
|
||||
def quote_as_configured(self, identifier, quote_key):
|
||||
"""Quote or do not quote the given identifer as configured in the
|
||||
project config for the quote key.
|
||||
|
||||
The quote key should be one of 'database' (on bigquery, 'profile'),
|
||||
'identifier', or 'schema', or it will be treated as if you set `True`.
|
||||
"""
|
||||
default = self.Relation.DEFAULTS['quote_policy'].get(quote_key)
|
||||
if self.config.quoting.get(quote_key, default):
|
||||
return self.quote(identifier)
|
||||
else:
|
||||
return identifier
|
||||
|
||||
###
|
||||
# Conversions: These must be implemented by concrete implementations, for
|
||||
# converting agate types into their sql equivalents.
|
||||
###
|
||||
@abstractclassmethod
|
||||
def convert_text_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the agate.Text
|
||||
type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_text_type` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def convert_number_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the agate.Number
|
||||
type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_number_type` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def convert_boolean_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the agate.Boolean
|
||||
type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_boolean_type` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def convert_datetime_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the agate.DateTime
|
||||
type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_datetime_type` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def convert_date_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the agate.Date
|
||||
type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_date_type` is not implemented for this adapter!')
|
||||
|
||||
@abstractclassmethod
|
||||
def convert_time_type(cls, agate_table, col_idx):
|
||||
"""Return the type in the database that best maps to the
|
||||
agate.TimeDelta type for the given agate table and column index.
|
||||
|
||||
:param agate.Table agate_table: The table
|
||||
:param int col_idx: The index into the agate table for the column.
|
||||
:return: The name of the type in the database
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`convert_time_type` is not implemented for this adapter!')
|
||||
|
||||
@available
|
||||
@classmethod
|
||||
def convert_type(cls, agate_table, col_idx):
|
||||
return cls.convert_agate_type(agate_table, col_idx)
|
||||
|
||||
@classmethod
|
||||
def convert_agate_type(cls, agate_table, col_idx):
|
||||
agate_type = agate_table.column_types[col_idx]
|
||||
conversions = [
|
||||
(agate.Text, cls.convert_text_type),
|
||||
(agate.Number, cls.convert_number_type),
|
||||
(agate.Boolean, cls.convert_boolean_type),
|
||||
(agate.DateTime, cls.convert_datetime_type),
|
||||
(agate.Date, cls.convert_date_type),
|
||||
(agate.TimeDelta, cls.convert_time_type),
|
||||
]
|
||||
for agate_cls, func in conversions:
|
||||
if isinstance(agate_type, agate_cls):
|
||||
return func(agate_table, col_idx)
|
||||
|
||||
###
|
||||
# Operations involving the manifest
|
||||
###
|
||||
def execute_macro(self, macro_name, manifest=None, project=None,
|
||||
context_override=None, kwargs=None, release=False):
|
||||
"""Look macro_name up in the manifest and execute its results.
|
||||
|
||||
:param str macro_name: The name of the macro to execute.
|
||||
:param Optional[Manifest] manifest: The manifest to use for generating
|
||||
the base macro execution context. If none is provided, use the
|
||||
internal manifest.
|
||||
:param Optional[str] project: The name of the project to search in, or
|
||||
None for the first match.
|
||||
:param Optional[dict] context_override: An optional dict to update()
|
||||
the macro execution context.
|
||||
:param Optional[dict] kwargs: An optional dict of keyword args used to
|
||||
pass to the macro.
|
||||
:param bool release: If True, release the connection after executing.
|
||||
|
||||
Return an an AttrDict with three attributes: 'table', 'data', and
|
||||
'status'. 'table' is an agate.Table.
|
||||
"""
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
if context_override is None:
|
||||
context_override = {}
|
||||
|
||||
if manifest is None:
|
||||
manifest = self._internal_manifest
|
||||
|
||||
macro = manifest.find_macro_by_name(macro_name, project)
|
||||
if macro is None:
|
||||
if project is None:
|
||||
package_name = 'any package'
|
||||
else:
|
||||
package_name = 'the "{}" package'.format(project)
|
||||
|
||||
# The import of dbt.context.runtime below shadows 'dbt'
|
||||
import dbt.exceptions
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
'dbt could not find a macro with the name "{}" in {}'
|
||||
.format(macro_name, package_name)
|
||||
)
|
||||
# This causes a reference cycle, as dbt.context.runtime.generate()
|
||||
# ends up calling get_adapter, so the import has to be here.
|
||||
import dbt.context.operation
|
||||
macro_context = dbt.context.operation.generate(
|
||||
macro,
|
||||
self.config,
|
||||
manifest
|
||||
)
|
||||
macro_context.update(context_override)
|
||||
|
||||
macro_function = macro.generator(macro_context)
|
||||
|
||||
try:
|
||||
result = macro_function(**kwargs)
|
||||
finally:
|
||||
if release:
|
||||
self.release_connection()
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def _catalog_filter_table(cls, table, manifest):
|
||||
"""Filter the table as appropriate for catalog entries. Subclasses can
|
||||
override this to change filtering rules on a per-adapter basis.
|
||||
"""
|
||||
return table.where(_catalog_filter_schemas(manifest))
|
||||
|
||||
def get_catalog(self, manifest):
|
||||
"""Get the catalog for this manifest by running the get catalog macro.
|
||||
Returns an agate.Table of catalog information.
|
||||
"""
|
||||
information_schemas = list(self._get_cache_schemas(manifest).keys())
|
||||
# make it a list so macros can index into it.
|
||||
kwargs = {'information_schemas': information_schemas}
|
||||
table = self.execute_macro(GET_CATALOG_MACRO_NAME,
|
||||
kwargs=kwargs,
|
||||
release=True)
|
||||
|
||||
results = self._catalog_filter_table(table, manifest)
|
||||
return results
|
||||
|
||||
def cancel_open_connections(self):
|
||||
"""Cancel all open connections."""
|
||||
return self.connections.cancel_open()
|
||||
|
||||
def calculate_freshness(self, source, loaded_at_field, manifest=None):
|
||||
"""Calculate the freshness of sources in dbt, and return it"""
|
||||
# in the future `source` will be a Relation instead of a string
|
||||
kwargs = {
|
||||
'source': source,
|
||||
'loaded_at_field': loaded_at_field
|
||||
}
|
||||
|
||||
# run the macro
|
||||
table = self.execute_macro(
|
||||
FRESHNESS_MACRO_NAME,
|
||||
kwargs=kwargs,
|
||||
release=True,
|
||||
manifest=manifest
|
||||
)
|
||||
# now we have a 1-row table of the maximum `loaded_at_field` value and
|
||||
# the current time according to the db.
|
||||
if len(table) != 1 or len(table[0]) != 2:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Got an invalid result from "{}" macro: {}'.format(
|
||||
FRESHNESS_MACRO_NAME, [tuple(r) for r in table]
|
||||
)
|
||||
)
|
||||
|
||||
max_loaded_at = _utc(table[0][0], source, loaded_at_field)
|
||||
snapshotted_at = _utc(table[0][1], source, loaded_at_field)
|
||||
|
||||
age = (snapshotted_at - max_loaded_at).total_seconds()
|
||||
return {
|
||||
'max_loaded_at': max_loaded_at,
|
||||
'snapshotted_at': snapshotted_at,
|
||||
'age': age,
|
||||
}
|
||||
117
core/dbt/adapters/base/meta.py
Normal file
117
core/dbt/adapters/base/meta.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import abc
|
||||
from functools import wraps
|
||||
from dbt.deprecations import warn, renamed_method
|
||||
|
||||
|
||||
def _always_none(*args, **kwargs):
|
||||
return None
|
||||
|
||||
|
||||
def _always_list(*args, **kwargs):
|
||||
return None
|
||||
|
||||
|
||||
def available(func):
|
||||
"""A decorator to indicate that a method on the adapter will be
|
||||
exposed to the database wrapper, and will be available at parse and run
|
||||
time.
|
||||
"""
|
||||
func._is_available_ = True
|
||||
return func
|
||||
|
||||
|
||||
def available_deprecated(supported_name, parse_replacement=None):
|
||||
"""A decorator that marks a function as available, but also prints a
|
||||
deprecation warning. Use like
|
||||
|
||||
@available_deprecated('my_new_method')
|
||||
def my_old_method(self, arg):
|
||||
args = compatability_shim(arg)
|
||||
return self.my_new_method(*args)
|
||||
|
||||
@available_deprecated('my_new_slow_method', lambda *a, **k: (0, ''))
|
||||
def my_old_slow_method(self, arg):
|
||||
args = compatibility_shim(arg)
|
||||
return self.my_new_slow_method(*args)
|
||||
|
||||
To make `adapter.my_old_method` available but also print out a warning on
|
||||
use directing users to `my_new_method`.
|
||||
|
||||
The optional parse_replacement, if provided, will provide a parse-time
|
||||
replacement for the actual method (see `available_parse`).
|
||||
"""
|
||||
def wrapper(func):
|
||||
func_name = func.__name__
|
||||
renamed_method(func_name, supported_name)
|
||||
|
||||
@wraps(func)
|
||||
def inner(*args, **kwargs):
|
||||
warn('adapter:{}'.format(func_name))
|
||||
return func(*args, **kwargs)
|
||||
|
||||
if parse_replacement:
|
||||
available = available_parse(parse_replacement)
|
||||
return available(inner)
|
||||
return wrapper
|
||||
|
||||
|
||||
def available_parse(parse_replacement):
|
||||
"""A decorator factory to indicate that a method on the adapter will be
|
||||
exposed to the database wrapper, and will be stubbed out at parse time with
|
||||
the given function.
|
||||
|
||||
@available_parse()
|
||||
def my_method(self, a, b):
|
||||
if something:
|
||||
return None
|
||||
return big_expensive_db_query()
|
||||
|
||||
@available_parse(lambda *args, **args: {})
|
||||
def my_other_method(self, a, b):
|
||||
x = {}
|
||||
x.update(big_expensive_db_query())
|
||||
return x
|
||||
"""
|
||||
def inner(func):
|
||||
func._parse_replacement_ = parse_replacement
|
||||
available(func)
|
||||
return func
|
||||
return inner
|
||||
|
||||
|
||||
available.deprecated = available_deprecated
|
||||
available.parse = available_parse
|
||||
available.parse_none = available_parse(lambda *a, **k: None)
|
||||
available.parse_list = available_parse(lambda *a, **k: [])
|
||||
|
||||
|
||||
class AdapterMeta(abc.ABCMeta):
|
||||
def __new__(mcls, name, bases, namespace, **kwargs):
|
||||
cls = super(AdapterMeta, mcls).__new__(mcls, name, bases, namespace,
|
||||
**kwargs)
|
||||
|
||||
# this is very much inspired by ABCMeta's own implementation
|
||||
|
||||
# dict mapping the method name to whether the model name should be
|
||||
# injected into the arguments. All methods in here are exposed to the
|
||||
# context.
|
||||
available = set()
|
||||
replacements = {}
|
||||
|
||||
# collect base class data first
|
||||
for base in bases:
|
||||
available.update(getattr(base, '_available_', set()))
|
||||
replacements.update(getattr(base, '_parse_replacements_', set()))
|
||||
|
||||
# override with local data if it exists
|
||||
for name, value in namespace.items():
|
||||
if getattr(value, '_is_available_', False):
|
||||
available.add(name)
|
||||
parse_replacement = getattr(value, '_parse_replacement_', None)
|
||||
if parse_replacement is not None:
|
||||
replacements[name] = parse_replacement
|
||||
|
||||
cls._available_ = frozenset(available)
|
||||
# should this be a namedtuple so it will be immutable like _available_?
|
||||
cls._parse_replacements_ = replacements
|
||||
return cls
|
||||
23
core/dbt/adapters/base/plugin.py
Normal file
23
core/dbt/adapters/base/plugin.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from dbt.config.project import Project
|
||||
|
||||
|
||||
class AdapterPlugin(object):
|
||||
"""Defines the basic requirements for a dbt adapter plugin.
|
||||
|
||||
:param type adapter: An adapter class, derived from BaseAdapter
|
||||
:param type credentials: A credentials object, derived from Credentials
|
||||
:param str project_name: The name of this adapter plugin's associated dbt
|
||||
project.
|
||||
:param str include_path: The path to this adapter plugin's root
|
||||
:param Optional[List[str]] dependencies: A list of adapter names that this
|
||||
adapter depends upon.
|
||||
"""
|
||||
def __init__(self, adapter, credentials, include_path, dependencies=None):
|
||||
self.adapter = adapter
|
||||
self.credentials = credentials
|
||||
self.include_path = include_path
|
||||
project = Project.from_project_root(include_path, {})
|
||||
self.project_name = project.project_name
|
||||
if dependencies is None:
|
||||
dependencies = []
|
||||
self.dependencies = dependencies
|
||||
@@ -1,36 +1,42 @@
|
||||
from dbt.api import APIObject
|
||||
from dbt.utils import filter_null_values
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
import dbt.exceptions
|
||||
|
||||
|
||||
class DefaultRelation(APIObject):
|
||||
class BaseRelation(APIObject):
|
||||
|
||||
Table = "table"
|
||||
View = "view"
|
||||
CTE = "cte"
|
||||
MaterializedView = "materializedview"
|
||||
ExternalTable = "externaltable"
|
||||
|
||||
RelationTypes = [
|
||||
Table,
|
||||
View,
|
||||
CTE
|
||||
CTE,
|
||||
MaterializedView,
|
||||
ExternalTable
|
||||
]
|
||||
|
||||
DEFAULTS = {
|
||||
'metadata': {
|
||||
'type': 'DefaultRelation'
|
||||
'type': 'BaseRelation'
|
||||
},
|
||||
'quote_character': '"',
|
||||
'quote_policy': {
|
||||
'database': True,
|
||||
'schema': True,
|
||||
'identifier': True
|
||||
'identifier': True,
|
||||
},
|
||||
'include_policy': {
|
||||
'database': False,
|
||||
'database': True,
|
||||
'schema': True,
|
||||
'identifier': True
|
||||
}
|
||||
'identifier': True,
|
||||
},
|
||||
'dbt_created': False,
|
||||
}
|
||||
|
||||
PATH_SCHEMA = {
|
||||
@@ -38,7 +44,7 @@ class DefaultRelation(APIObject):
|
||||
'properties': {
|
||||
'database': {'type': ['string', 'null']},
|
||||
'schema': {'type': ['string', 'null']},
|
||||
'identifier': {'type': 'string'},
|
||||
'identifier': {'type': ['string', 'null']},
|
||||
},
|
||||
'required': ['database', 'schema', 'identifier'],
|
||||
}
|
||||
@@ -61,7 +67,7 @@ class DefaultRelation(APIObject):
|
||||
'properties': {
|
||||
'type': {
|
||||
'type': 'string',
|
||||
'const': 'DefaultRelation',
|
||||
'const': 'BaseRelation',
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -72,13 +78,20 @@ class DefaultRelation(APIObject):
|
||||
'include_policy': POLICY_SCHEMA,
|
||||
'quote_policy': POLICY_SCHEMA,
|
||||
'quote_character': {'type': 'string'},
|
||||
'dbt_created': {'type': 'boolean'},
|
||||
},
|
||||
'required': ['metadata', 'type', 'path', 'include_policy',
|
||||
'quote_policy', 'quote_character']
|
||||
'quote_policy', 'quote_character', 'dbt_created']
|
||||
}
|
||||
|
||||
PATH_ELEMENTS = ['database', 'schema', 'identifier']
|
||||
|
||||
def _is_exactish_match(self, field, value):
|
||||
if self.dbt_created and self.quote_policy.get(field) is False:
|
||||
return self.get_path_part(field).lower() == value.lower()
|
||||
else:
|
||||
return self.get_path_part(field) == value
|
||||
|
||||
def matches(self, database=None, schema=None, identifier=None):
|
||||
search = filter_null_values({
|
||||
'database': database,
|
||||
@@ -95,7 +108,7 @@ class DefaultRelation(APIObject):
|
||||
approximate_match = True
|
||||
|
||||
for k, v in search.items():
|
||||
if self.get_path_part(k) != v:
|
||||
if not self._is_exactish_match(k, v):
|
||||
exact_match = False
|
||||
|
||||
if self.get_path_part(k).lower() != v.lower():
|
||||
@@ -103,7 +116,8 @@ class DefaultRelation(APIObject):
|
||||
|
||||
if approximate_match and not exact_match:
|
||||
target = self.create(
|
||||
database=database, schema=schema, identifier=identifier)
|
||||
database=database, schema=schema, identifier=identifier
|
||||
)
|
||||
dbt.exceptions.approximate_relation_match(target, self)
|
||||
|
||||
return exact_match
|
||||
@@ -135,6 +149,36 @@ class DefaultRelation(APIObject):
|
||||
|
||||
return self.incorporate(include_policy=policy)
|
||||
|
||||
def information_schema(self, identifier=None):
|
||||
include_db = self.database is not None
|
||||
include_policy = filter_null_values({
|
||||
'database': include_db,
|
||||
'schema': True,
|
||||
'identifier': identifier is not None
|
||||
})
|
||||
quote_policy = filter_null_values({
|
||||
'database': self.quote_policy['database'],
|
||||
'schema': False,
|
||||
'identifier': False,
|
||||
})
|
||||
|
||||
path_update = {
|
||||
'schema': 'information_schema',
|
||||
'identifier': identifier
|
||||
}
|
||||
|
||||
return self.incorporate(
|
||||
quote_policy=quote_policy,
|
||||
include_policy=include_policy,
|
||||
path=path_update,
|
||||
table_name=identifier)
|
||||
|
||||
def information_schema_only(self):
|
||||
return self.information_schema()
|
||||
|
||||
def information_schema_table(self, identifier):
|
||||
return self.information_schema(identifier)
|
||||
|
||||
def render(self, use_table_name=True):
|
||||
parts = []
|
||||
|
||||
@@ -173,15 +217,19 @@ class DefaultRelation(APIObject):
|
||||
identifier=identifier)
|
||||
|
||||
@classmethod
|
||||
def _create_from_node(cls, config, node, table_name, quote_policy,
|
||||
**kwargs):
|
||||
def create_from_source(cls, source, **kwargs):
|
||||
quote_policy = dbt.utils.deep_merge(
|
||||
cls.DEFAULTS['quote_policy'],
|
||||
source.quoting,
|
||||
kwargs.get('quote_policy', {})
|
||||
)
|
||||
return cls.create(
|
||||
database=config.credentials.dbname,
|
||||
schema=node.get('schema'),
|
||||
identifier=node.get('alias'),
|
||||
table_name=table_name,
|
||||
database=source.database,
|
||||
schema=source.schema,
|
||||
identifier=source.identifier,
|
||||
quote_policy=quote_policy,
|
||||
**kwargs)
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create_from_node(cls, config, node, table_name=None, quote_policy=None,
|
||||
@@ -190,9 +238,21 @@ class DefaultRelation(APIObject):
|
||||
quote_policy = {}
|
||||
|
||||
quote_policy = dbt.utils.merge(config.quoting, quote_policy)
|
||||
return cls._create_from_node(config=config, quote_policy=quote_policy,
|
||||
node=node, table_name=table_name,
|
||||
**kwargs)
|
||||
|
||||
return cls.create(
|
||||
database=node.get('database'),
|
||||
schema=node.get('schema'),
|
||||
identifier=node.get('alias'),
|
||||
table_name=table_name,
|
||||
quote_policy=quote_policy,
|
||||
**kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_from(cls, config, node, **kwargs):
|
||||
if node.resource_type == NodeType.Source:
|
||||
return cls.create_from_source(node, **kwargs)
|
||||
else:
|
||||
return cls.create_from_node(config, node, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create(cls, database=None, schema=None,
|
||||
@@ -256,3 +316,91 @@ class DefaultRelation(APIObject):
|
||||
@property
|
||||
def is_view(self):
|
||||
return self.type == self.View
|
||||
|
||||
|
||||
class Column(object):
|
||||
TYPE_LABELS = {
|
||||
'STRING': 'TEXT',
|
||||
'TIMESTAMP': 'TIMESTAMP',
|
||||
'FLOAT': 'FLOAT',
|
||||
'INTEGER': 'INT'
|
||||
}
|
||||
|
||||
def __init__(self, column, dtype, char_size=None, numeric_precision=None,
|
||||
numeric_scale=None):
|
||||
self.column = column
|
||||
self.dtype = dtype
|
||||
self.char_size = char_size
|
||||
self.numeric_precision = numeric_precision
|
||||
self.numeric_scale = numeric_scale
|
||||
|
||||
@classmethod
|
||||
def translate_type(cls, dtype):
|
||||
return cls.TYPE_LABELS.get(dtype.upper(), dtype)
|
||||
|
||||
@classmethod
|
||||
def create(cls, name, label_or_dtype):
|
||||
column_type = cls.translate_type(label_or_dtype)
|
||||
return cls(name, column_type)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.column
|
||||
|
||||
@property
|
||||
def quoted(self):
|
||||
return '"{}"'.format(self.column)
|
||||
|
||||
@property
|
||||
def data_type(self):
|
||||
if self.is_string():
|
||||
return Column.string_type(self.string_size())
|
||||
elif self.is_numeric():
|
||||
return Column.numeric_type(self.dtype, self.numeric_precision,
|
||||
self.numeric_scale)
|
||||
else:
|
||||
return self.dtype
|
||||
|
||||
def is_string(self):
|
||||
return self.dtype.lower() in ['text', 'character varying', 'character',
|
||||
'varchar']
|
||||
|
||||
def is_numeric(self):
|
||||
return self.dtype.lower() in ['numeric', 'number']
|
||||
|
||||
def string_size(self):
|
||||
if not self.is_string():
|
||||
raise RuntimeError("Called string_size() on non-string field!")
|
||||
|
||||
if self.dtype == 'text' or self.char_size is None:
|
||||
# char_size should never be None. Handle it reasonably just in case
|
||||
return 256
|
||||
else:
|
||||
return int(self.char_size)
|
||||
|
||||
def can_expand_to(self, other_column):
|
||||
"""returns True if this column can be expanded to the size of the
|
||||
other column"""
|
||||
if not self.is_string() or not other_column.is_string():
|
||||
return False
|
||||
|
||||
return other_column.string_size() > self.string_size()
|
||||
|
||||
def literal(self, value):
|
||||
return "{}::{}".format(value, self.data_type)
|
||||
|
||||
@classmethod
|
||||
def string_type(cls, size):
|
||||
return "character varying({})".format(size)
|
||||
|
||||
@classmethod
|
||||
def numeric_type(cls, dtype, precision, scale):
|
||||
# This could be decimal(...), numeric(...), number(...)
|
||||
# Just use whatever was fed in here -- don't try to get too clever
|
||||
if precision is None or scale is None:
|
||||
return dtype
|
||||
else:
|
||||
return "{}({},{})".format(dtype, precision, scale)
|
||||
|
||||
def __repr__(self):
|
||||
return "<Column {} ({})>".format(self.name, self.data_type)
|
||||
@@ -1,12 +1,27 @@
|
||||
from collections import namedtuple
|
||||
import threading
|
||||
from copy import deepcopy
|
||||
import pprint
|
||||
from dbt.logger import CACHE_LOGGER as logger
|
||||
import dbt.exceptions
|
||||
|
||||
|
||||
_ReferenceKey = namedtuple('_ReferenceKey', 'schema identifier')
|
||||
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
|
||||
|
||||
|
||||
def _lower(value):
|
||||
"""Postgres schemas can be None so we can't just call lower()."""
|
||||
if value is None:
|
||||
return None
|
||||
return value.lower()
|
||||
|
||||
|
||||
def _make_key(relation):
|
||||
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
|
||||
to keep track of quoting
|
||||
"""
|
||||
return _ReferenceKey(_lower(relation.database),
|
||||
_lower(relation.schema),
|
||||
_lower(relation.identifier))
|
||||
|
||||
|
||||
def dot_separated(key):
|
||||
@@ -14,7 +29,7 @@ def dot_separated(key):
|
||||
|
||||
:param key _ReferenceKey: The key to stringify.
|
||||
"""
|
||||
return '.'.join(key)
|
||||
return '.'.join(map(str, key))
|
||||
|
||||
|
||||
class _CachedRelation(object):
|
||||
@@ -24,7 +39,7 @@ class _CachedRelation(object):
|
||||
:attr str identifier: The identifier of this relation.
|
||||
:attr Dict[_ReferenceKey, _CachedRelation] referenced_by: The relations
|
||||
that refer to this relation.
|
||||
:attr DefaultRelation inner: The underlying dbt relation.
|
||||
:attr BaseRelation inner: The underlying dbt relation.
|
||||
"""
|
||||
def __init__(self, inner):
|
||||
self.referenced_by = {}
|
||||
@@ -32,16 +47,20 @@ class _CachedRelation(object):
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
'_CachedRelation(schema={}, identifier={}, inner={})'
|
||||
).format(self.schema, self.identifier, self.inner)
|
||||
'_CachedRelation(database={}, schema={}, identifier={}, inner={})'
|
||||
).format(self.database, self.schema, self.identifier, self.inner)
|
||||
|
||||
@property
|
||||
def database(self):
|
||||
return _lower(self.inner.database)
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return self.inner.schema
|
||||
return _lower(self.inner.schema)
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
return self.inner.identifier
|
||||
return _lower(self.inner.identifier)
|
||||
|
||||
def __copy__(self):
|
||||
new = self.__class__(self.inner)
|
||||
@@ -61,7 +80,7 @@ class _CachedRelation(object):
|
||||
|
||||
:return _ReferenceKey: A key for this relation.
|
||||
"""
|
||||
return _ReferenceKey(self.schema, self.identifier)
|
||||
return _make_key(self)
|
||||
|
||||
def add_reference(self, referrer):
|
||||
"""Add a reference from referrer to self, indicating that if this node
|
||||
@@ -98,7 +117,7 @@ class _CachedRelation(object):
|
||||
Note that this will change the output of key(), all refs must be
|
||||
updated!
|
||||
|
||||
:param _ReferenceKey new_relation: The new name to apply to the
|
||||
:param _CachedRelation new_relation: The new name to apply to the
|
||||
relation
|
||||
"""
|
||||
# Relations store this stuff inside their `path` dict. But they
|
||||
@@ -107,10 +126,11 @@ class _CachedRelation(object):
|
||||
# table_name is ever anything but the identifier (via .create())
|
||||
self.inner = self.inner.incorporate(
|
||||
path={
|
||||
'schema': new_relation.schema,
|
||||
'identifier': new_relation.identifier
|
||||
'database': new_relation.inner.database,
|
||||
'schema': new_relation.inner.schema,
|
||||
'identifier': new_relation.inner.identifier
|
||||
},
|
||||
table_name=new_relation.identifier
|
||||
table_name=new_relation.inner.identifier
|
||||
)
|
||||
|
||||
def rename_key(self, old_key, new_key):
|
||||
@@ -142,6 +162,12 @@ class _CachedRelation(object):
|
||||
return [dot_separated(r) for r in self.referenced_by]
|
||||
|
||||
|
||||
def lazy_log(msg, func):
|
||||
if logger.disabled:
|
||||
return
|
||||
logger.debug(msg.format(func()))
|
||||
|
||||
|
||||
class RelationsCache(object):
|
||||
"""A cache of the relations known to dbt. Keeps track of relationships
|
||||
declared between tables and handles renames/drops as a real database would.
|
||||
@@ -156,27 +182,40 @@ class RelationsCache(object):
|
||||
self.lock = threading.RLock()
|
||||
self.schemas = set()
|
||||
|
||||
def add_schema(self, schema):
|
||||
def add_schema(self, database, schema):
|
||||
"""Add a schema to the set of known schemas (case-insensitive)
|
||||
|
||||
:param str database: The database name to add.
|
||||
:param str schema: The schema name to add.
|
||||
"""
|
||||
self.schemas.add(schema.lower())
|
||||
self.schemas.add((_lower(database), _lower(schema)))
|
||||
|
||||
def remove_schema(self, database, schema):
|
||||
"""Remove a schema from the set of known schemas (case-insensitive)
|
||||
|
||||
If the schema does not exist, it will be ignored - it could just be a
|
||||
temporary table.
|
||||
|
||||
:param str database: The database name to remove.
|
||||
:param str schema: The schema name to remove.
|
||||
"""
|
||||
self.schemas.discard((_lower(database), _lower(schema)))
|
||||
|
||||
def update_schemas(self, schemas):
|
||||
"""Add multiple schemas to the set of known schemas (case-insensitive)
|
||||
|
||||
:param Iterable[str] schemas: An iterable of the schema names to add.
|
||||
"""
|
||||
self.schemas.update(s.lower() for s in schemas)
|
||||
self.schemas.update((_lower(d), _lower(s)) for (d, s) in schemas)
|
||||
|
||||
def __contains__(self, schema):
|
||||
def __contains__(self, schema_id):
|
||||
"""A schema is 'in' the relations cache if it is in the set of cached
|
||||
schemas.
|
||||
|
||||
:param str schema: The schema name to look up.
|
||||
:param Tuple[str, str] schema: The db name and schema name to look up.
|
||||
"""
|
||||
return schema in self.schemas
|
||||
db, schema = schema_id
|
||||
return (_lower(db), _lower(schema)) in self.schemas
|
||||
|
||||
def dump_graph(self):
|
||||
"""Dump a key-only representation of the schema to a dictionary. Every
|
||||
@@ -199,7 +238,7 @@ class RelationsCache(object):
|
||||
:return _CachedRelation: The relation stored under the given relation's
|
||||
key
|
||||
"""
|
||||
self.schemas.add(relation.schema)
|
||||
self.add_schema(relation.database, relation.schema)
|
||||
key = relation.key()
|
||||
return self.relations.setdefault(key, relation)
|
||||
|
||||
@@ -239,28 +278,22 @@ class RelationsCache(object):
|
||||
to bar, so "drop bar cascade" will drop foo and all of foo's
|
||||
dependents.
|
||||
|
||||
:param DefaultRelation referenced: The referenced model.
|
||||
:param DefaultRelation dependent: The dependent model.
|
||||
:param BaseRelation referenced: The referenced model.
|
||||
:param BaseRelation dependent: The dependent model.
|
||||
:raises InternalError: If either entry does not exist.
|
||||
"""
|
||||
referenced = _ReferenceKey(
|
||||
schema=referenced.schema,
|
||||
identifier=referenced.name
|
||||
)
|
||||
if referenced.schema not in self:
|
||||
referenced = _make_key(referenced)
|
||||
if (referenced.database, referenced.schema) not in self:
|
||||
# if we have not cached the referenced schema at all, we must be
|
||||
# referring to a table outside our control. There's no need to make
|
||||
# a link - we will never drop the referenced relation during a run.
|
||||
logger.debug(
|
||||
'{dep!s} references {ref!s} but {ref.schema} is not in the '
|
||||
'cache, skipping assumed external relation'
|
||||
'{dep!s} references {ref!s} but {ref.database}.{ref.schema} '
|
||||
'is not in the cache, skipping assumed external relation'
|
||||
.format(dep=dependent, ref=referenced)
|
||||
)
|
||||
return
|
||||
dependent = _ReferenceKey(
|
||||
schema=dependent.schema,
|
||||
identifier=dependent.name
|
||||
)
|
||||
dependent = _make_key(dependent)
|
||||
logger.debug(
|
||||
'adding link, {!s} references {!s}'.format(dependent, referenced)
|
||||
)
|
||||
@@ -271,18 +304,17 @@ class RelationsCache(object):
|
||||
"""Add the relation inner to the cache, under the schema schema and
|
||||
identifier identifier
|
||||
|
||||
:param DefaultRelation relation: The underlying relation.
|
||||
:param BaseRelation relation: The underlying relation.
|
||||
"""
|
||||
cached = _CachedRelation(relation)
|
||||
logger.debug('Adding relation: {!s}'.format(cached))
|
||||
logger.debug('before adding: {}'.format(
|
||||
pprint.pformat(self.dump_graph()))
|
||||
)
|
||||
|
||||
lazy_log('before adding: {!s}', self.dump_graph)
|
||||
|
||||
with self.lock:
|
||||
self._setdefault(cached)
|
||||
logger.debug('after adding: {}'.format(
|
||||
pprint.pformat(self.dump_graph()))
|
||||
)
|
||||
|
||||
lazy_log('after adding: {!s}', self.dump_graph)
|
||||
|
||||
def _remove_refs(self, keys):
|
||||
"""Removes all references to all entries in keys. This does not
|
||||
@@ -324,42 +356,27 @@ class RelationsCache(object):
|
||||
:param str schema: The schema of the relation to drop.
|
||||
:param str identifier: The identifier of the relation to drop.
|
||||
"""
|
||||
dropped = _ReferenceKey(schema=relation.schema,
|
||||
identifier=relation.identifier)
|
||||
dropped = _make_key(relation)
|
||||
logger.debug('Dropping relation: {!s}'.format(dropped))
|
||||
with self.lock:
|
||||
self._drop_cascade_relation(dropped)
|
||||
|
||||
def _rename_relation(self, old_key, new_key):
|
||||
def _rename_relation(self, old_key, new_relation):
|
||||
"""Rename a relation named old_key to new_key, updating references.
|
||||
If the new key is already present, that is an error.
|
||||
If the old key is absent, we only debug log and return, assuming it's a
|
||||
temp table being renamed.
|
||||
Return whether or not there was a key to rename.
|
||||
|
||||
:param _ReferenceKey old_key: The existing key, to rename from.
|
||||
:param _ReferenceKey new_key: The new key, to rename to.
|
||||
:raises InternalError: If the new key is already present.
|
||||
:param _CachedRelation new_key: The new relation, to rename to.
|
||||
"""
|
||||
if old_key not in self.relations:
|
||||
logger.debug(
|
||||
'old key {} not found in self.relations, assuming temporary'
|
||||
.format(old_key)
|
||||
)
|
||||
return
|
||||
|
||||
if new_key in self.relations:
|
||||
dbt.exceptions.raise_cache_inconsistent(
|
||||
'in rename, new key {} already in cache: {}'
|
||||
.format(new_key, list(self.relations.keys()))
|
||||
)
|
||||
|
||||
# On the database level, a rename updates all values that were
|
||||
# previously referenced by old_name to be referenced by new_name.
|
||||
# basically, the name changes but some underlying ID moves. Kind of
|
||||
# like an object reference!
|
||||
relation = self.relations.pop(old_key)
|
||||
new_key = new_relation.key()
|
||||
|
||||
relation.rename(new_key)
|
||||
# relaton has to rename its innards, so it needs the _CachedRelation.
|
||||
relation.rename(new_relation)
|
||||
# update all the relations that refer to it
|
||||
for cached in self.relations.values():
|
||||
if cached.is_referenced_by(old_key):
|
||||
@@ -370,6 +387,38 @@ class RelationsCache(object):
|
||||
cached.rename_key(old_key, new_key)
|
||||
|
||||
self.relations[new_key] = relation
|
||||
# also fixup the schemas!
|
||||
self.remove_schema(old_key.database, old_key.schema)
|
||||
self.add_schema(new_key.database, new_key.schema)
|
||||
|
||||
return True
|
||||
|
||||
def _check_rename_constraints(self, old_key, new_key):
|
||||
"""Check the rename constraints, and return whether or not the rename
|
||||
can proceed.
|
||||
|
||||
If the new key is already present, that is an error.
|
||||
If the old key is absent, we debug log and return False, assuming it's
|
||||
a temp table being renamed.
|
||||
|
||||
:param _ReferenceKey old_key: The existing key, to rename from.
|
||||
:param _ReferenceKey new_key: The new key, to rename to.
|
||||
:return bool: If the old relation exists for renaming.
|
||||
:raises InternalError: If the new key is already present.
|
||||
"""
|
||||
if new_key in self.relations:
|
||||
dbt.exceptions.raise_cache_inconsistent(
|
||||
'in rename, new key {} already in cache: {}'
|
||||
.format(new_key, list(self.relations.keys()))
|
||||
)
|
||||
|
||||
if old_key not in self.relations:
|
||||
logger.debug(
|
||||
'old key {} not found in self.relations, assuming temporary'
|
||||
.format(old_key)
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
def rename(self, old, new):
|
||||
"""Rename the old schema/identifier to the new schema/identifier and
|
||||
@@ -379,42 +428,39 @@ class RelationsCache(object):
|
||||
If the schema/identifier key is absent, we only debug log and return,
|
||||
assuming it's a temp table being renamed.
|
||||
|
||||
:param DefaultRelation old: The existing relation name information.
|
||||
:param DefaultRelation new: The new relation name information.
|
||||
:param BaseRelation old: The existing relation name information.
|
||||
:param BaseRelation new: The new relation name information.
|
||||
:raises InternalError: If the new key is already present.
|
||||
"""
|
||||
old_key = _ReferenceKey(
|
||||
schema=old.schema,
|
||||
identifier=old.identifier
|
||||
)
|
||||
new_key = _ReferenceKey(
|
||||
schema=new.schema,
|
||||
identifier=new.identifier
|
||||
)
|
||||
old_key = _make_key(old)
|
||||
new_key = _make_key(new)
|
||||
logger.debug('Renaming relation {!s} to {!s}'.format(
|
||||
old_key, new_key)
|
||||
)
|
||||
logger.debug('before rename: {}'.format(
|
||||
pprint.pformat(self.dump_graph()))
|
||||
)
|
||||
with self.lock:
|
||||
self._rename_relation(old_key, new_key)
|
||||
logger.debug('after rename: {}'.format(
|
||||
pprint.pformat(self.dump_graph()))
|
||||
)
|
||||
old_key, new_key
|
||||
))
|
||||
|
||||
def get_relations(self, schema):
|
||||
lazy_log('before rename: {!s}', self.dump_graph)
|
||||
|
||||
with self.lock:
|
||||
if self._check_rename_constraints(old_key, new_key):
|
||||
self._rename_relation(old_key, _CachedRelation(new))
|
||||
else:
|
||||
self._setdefault(_CachedRelation(new))
|
||||
|
||||
lazy_log('after rename: {!s}', self.dump_graph)
|
||||
|
||||
def get_relations(self, database, schema):
|
||||
"""Case-insensitively yield all relations matching the given schema.
|
||||
|
||||
:param str schema: The case-insensitive schema name to list from.
|
||||
:return List[DefaultRelation]: The list of relations with the given
|
||||
:return List[BaseRelation]: The list of relations with the given
|
||||
schema
|
||||
"""
|
||||
schema = schema.lower()
|
||||
schema = _lower(schema)
|
||||
with self.lock:
|
||||
results = [
|
||||
r.inner for r in self.relations.values()
|
||||
if r.schema.lower() == schema
|
||||
if (r.schema == _lower(schema) and
|
||||
r.database == _lower(database))
|
||||
]
|
||||
|
||||
if None in results:
|
||||
86
core/dbt/adapters/factory.py
Normal file
86
core/dbt/adapters/factory.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import dbt.exceptions
|
||||
from importlib import import_module
|
||||
from dbt.include.global_project import PACKAGES
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
import threading
|
||||
|
||||
ADAPTER_TYPES = {}
|
||||
|
||||
_ADAPTERS = {}
|
||||
_ADAPTER_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_adapter_class_by_name(adapter_name):
|
||||
with _ADAPTER_LOCK:
|
||||
if adapter_name in ADAPTER_TYPES:
|
||||
return ADAPTER_TYPES[adapter_name]
|
||||
|
||||
message = "Invalid adapter type {}! Must be one of {}"
|
||||
adapter_names = ", ".join(ADAPTER_TYPES.keys())
|
||||
formatted_message = message.format(adapter_name, adapter_names)
|
||||
raise dbt.exceptions.RuntimeException(formatted_message)
|
||||
|
||||
|
||||
def get_relation_class_by_name(adapter_name):
|
||||
adapter = get_adapter_class_by_name(adapter_name)
|
||||
return adapter.Relation
|
||||
|
||||
|
||||
def load_plugin(adapter_name):
|
||||
try:
|
||||
mod = import_module('.' + adapter_name, 'dbt.adapters')
|
||||
except ImportError as e:
|
||||
logger.info("Error importing adapter: {}".format(e))
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
"Could not find adapter type {}!".format(adapter_name)
|
||||
)
|
||||
plugin = mod.Plugin
|
||||
|
||||
if plugin.adapter.type() != adapter_name:
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
'Expected to find adapter with type named {}, got adapter with '
|
||||
'type {}'
|
||||
.format(adapter_name, plugin.adapter.type())
|
||||
)
|
||||
|
||||
with _ADAPTER_LOCK:
|
||||
ADAPTER_TYPES[adapter_name] = plugin.adapter
|
||||
|
||||
PACKAGES[plugin.project_name] = plugin.include_path
|
||||
|
||||
for dep in plugin.dependencies:
|
||||
load_plugin(dep)
|
||||
|
||||
return plugin.credentials
|
||||
|
||||
|
||||
def get_adapter(config):
|
||||
adapter_name = config.credentials.type
|
||||
if adapter_name in _ADAPTERS:
|
||||
return _ADAPTERS[adapter_name]
|
||||
|
||||
with _ADAPTER_LOCK:
|
||||
if adapter_name not in ADAPTER_TYPES:
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
"Could not find adapter type {}!".format(adapter_name)
|
||||
)
|
||||
|
||||
adapter_type = ADAPTER_TYPES[adapter_name]
|
||||
|
||||
# check again, in case something was setting it before
|
||||
if adapter_name in _ADAPTERS:
|
||||
return _ADAPTERS[adapter_name]
|
||||
|
||||
adapter = adapter_type(config)
|
||||
_ADAPTERS[adapter_name] = adapter
|
||||
return adapter
|
||||
|
||||
|
||||
def reset_adapters():
|
||||
"""Clear the adapters. This is useful for tests, which change configs.
|
||||
"""
|
||||
with _ADAPTER_LOCK:
|
||||
for adapter in _ADAPTERS.values():
|
||||
adapter.cleanup_connections()
|
||||
_ADAPTERS.clear()
|
||||
3
core/dbt/adapters/sql/__init__.py
Normal file
3
core/dbt/adapters/sql/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
# these are all just exports, #noqa them so flake8 will be happy
|
||||
from dbt.adapters.sql.connections import SQLConnectionManager # noqa
|
||||
from dbt.adapters.sql.impl import SQLAdapter # noqa
|
||||
141
core/dbt/adapters/sql/connections.py
Normal file
141
core/dbt/adapters/sql/connections.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import abc
|
||||
import time
|
||||
|
||||
import dbt.clients.agate_helper
|
||||
import dbt.exceptions
|
||||
from dbt.contracts.connection import Connection
|
||||
from dbt.adapters.base import BaseConnectionManager
|
||||
from dbt.compat import abstractclassmethod
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
|
||||
class SQLConnectionManager(BaseConnectionManager):
|
||||
"""The default connection manager with some common SQL methods implemented.
|
||||
|
||||
Methods to implement:
|
||||
- exception_handler
|
||||
- cancel
|
||||
- get_status
|
||||
- open
|
||||
"""
|
||||
@abc.abstractmethod
|
||||
def cancel(self, connection):
|
||||
"""Cancel the given connection.
|
||||
|
||||
:param Connection connection: The connection to cancel.
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`cancel` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
def cancel_open(self):
|
||||
names = []
|
||||
this_connection = self.get_if_exists()
|
||||
with self.lock:
|
||||
for connection in self.thread_connections.values():
|
||||
if connection is this_connection:
|
||||
continue
|
||||
|
||||
self.cancel(connection)
|
||||
names.append(connection.name)
|
||||
return names
|
||||
|
||||
def add_query(self, sql, auto_begin=True, bindings=None,
|
||||
abridge_sql_log=False):
|
||||
connection = self.get_thread_connection()
|
||||
if auto_begin and connection.transaction_open is False:
|
||||
self.begin()
|
||||
|
||||
logger.debug('Using {} connection "{}".'
|
||||
.format(self.TYPE, connection.name))
|
||||
|
||||
with self.exception_handler(sql):
|
||||
if abridge_sql_log:
|
||||
logger.debug('On %s: %s....', connection.name, sql[0:512])
|
||||
else:
|
||||
logger.debug('On %s: %s', connection.name, sql)
|
||||
pre = time.time()
|
||||
|
||||
cursor = connection.handle.cursor()
|
||||
cursor.execute(sql, bindings)
|
||||
|
||||
logger.debug("SQL status: %s in %0.2f seconds",
|
||||
self.get_status(cursor), (time.time() - pre))
|
||||
|
||||
return connection, cursor
|
||||
|
||||
@abstractclassmethod
|
||||
def get_status(cls, cursor):
|
||||
"""Get the status of the cursor.
|
||||
|
||||
:param cursor: A database handle to get status from
|
||||
:return: The current status
|
||||
:rtype: str
|
||||
"""
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`get_status` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def process_results(cls, column_names, rows):
|
||||
return [dict(zip(column_names, row)) for row in rows]
|
||||
|
||||
@classmethod
|
||||
def get_result_from_cursor(cls, cursor):
|
||||
data = []
|
||||
column_names = []
|
||||
|
||||
if cursor.description is not None:
|
||||
column_names = [col[0] for col in cursor.description]
|
||||
rows = cursor.fetchall()
|
||||
data = cls.process_results(column_names, rows)
|
||||
|
||||
return dbt.clients.agate_helper.table_from_data(data, column_names)
|
||||
|
||||
def execute(self, sql, auto_begin=False, fetch=False):
|
||||
_, cursor = self.add_query(sql, auto_begin)
|
||||
status = self.get_status(cursor)
|
||||
if fetch:
|
||||
table = self.get_result_from_cursor(cursor)
|
||||
else:
|
||||
table = dbt.clients.agate_helper.empty_table()
|
||||
return status, table
|
||||
|
||||
def add_begin_query(self):
|
||||
return self.add_query('BEGIN', auto_begin=False)
|
||||
|
||||
def add_commit_query(self):
|
||||
return self.add_query('COMMIT', auto_begin=False)
|
||||
|
||||
def begin(self):
|
||||
connection = self.get_thread_connection()
|
||||
|
||||
if dbt.flags.STRICT_MODE:
|
||||
assert isinstance(connection, Connection)
|
||||
|
||||
if connection.transaction_open is True:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Tried to begin a new transaction on connection "{}", but '
|
||||
'it already had one open!'.format(connection.get('name')))
|
||||
|
||||
self.add_begin_query()
|
||||
|
||||
connection.transaction_open = True
|
||||
return connection
|
||||
|
||||
def commit(self):
|
||||
connection = self.get_thread_connection()
|
||||
if dbt.flags.STRICT_MODE:
|
||||
assert isinstance(connection, Connection)
|
||||
|
||||
if connection.transaction_open is False:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Tried to commit transaction on connection "{}", but '
|
||||
'it does not have one open!'.format(connection.name))
|
||||
|
||||
logger.debug('On {}: COMMIT'.format(connection.name))
|
||||
self.add_commit_query()
|
||||
|
||||
connection.transaction_open = False
|
||||
|
||||
return connection
|
||||
222
core/dbt/adapters/sql/impl.py
Normal file
222
core/dbt/adapters/sql/impl.py
Normal file
@@ -0,0 +1,222 @@
|
||||
import agate
|
||||
|
||||
import dbt.clients.agate_helper
|
||||
import dbt.exceptions
|
||||
import dbt.flags
|
||||
from dbt.adapters.base import BaseAdapter, available
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
|
||||
LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching'
|
||||
GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation'
|
||||
LIST_SCHEMAS_MACRO_NAME = 'list_schemas'
|
||||
CHECK_SCHEMA_EXISTS_MACRO_NAME = 'check_schema_exists'
|
||||
CREATE_SCHEMA_MACRO_NAME = 'create_schema'
|
||||
DROP_SCHEMA_MACRO_NAME = 'drop_schema'
|
||||
RENAME_RELATION_MACRO_NAME = 'rename_relation'
|
||||
TRUNCATE_RELATION_MACRO_NAME = 'truncate_relation'
|
||||
DROP_RELATION_MACRO_NAME = 'drop_relation'
|
||||
ALTER_COLUMN_TYPE_MACRO_NAME = 'alter_column_type'
|
||||
|
||||
|
||||
class SQLAdapter(BaseAdapter):
|
||||
"""The default adapter with the common agate conversions and some SQL
|
||||
methods implemented. This adapter has a different much shorter list of
|
||||
methods to implement, but some more macros that must be implemented.
|
||||
|
||||
To implement a macro, implement "${adapter_type}__${macro_name}". in the
|
||||
adapter's internal project.
|
||||
|
||||
Methods to implement:
|
||||
- date_function
|
||||
|
||||
Macros to implement:
|
||||
- get_catalog
|
||||
- list_relations_without_caching
|
||||
- get_columns_in_relation
|
||||
"""
|
||||
@available.parse(lambda *a, **k: (None, None))
|
||||
def add_query(self, sql, auto_begin=True, bindings=None,
|
||||
abridge_sql_log=False):
|
||||
"""Add a query to the current transaction. A thin wrapper around
|
||||
ConnectionManager.add_query.
|
||||
|
||||
:param str sql: The SQL query to add
|
||||
:param bool auto_begin: If set and there is no transaction in progress,
|
||||
begin a new one.
|
||||
:param Optional[List[object]]: An optional list of bindings for the
|
||||
query.
|
||||
:param bool abridge_sql_log: If set, limit the raw sql logged to 512
|
||||
characters
|
||||
"""
|
||||
return self.connections.add_query(sql, auto_begin, bindings,
|
||||
abridge_sql_log)
|
||||
|
||||
@classmethod
|
||||
def convert_text_type(cls, agate_table, col_idx):
|
||||
return "text"
|
||||
|
||||
@classmethod
|
||||
def convert_number_type(cls, agate_table, col_idx):
|
||||
decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
|
||||
return "float8" if decimals else "integer"
|
||||
|
||||
@classmethod
|
||||
def convert_boolean_type(cls, agate_table, col_idx):
|
||||
return "boolean"
|
||||
|
||||
@classmethod
|
||||
def convert_datetime_type(cls, agate_table, col_idx):
|
||||
return "timestamp without time zone"
|
||||
|
||||
@classmethod
|
||||
def convert_date_type(cls, agate_table, col_idx):
|
||||
return "date"
|
||||
|
||||
@classmethod
|
||||
def convert_time_type(cls, agate_table, col_idx):
|
||||
return "time"
|
||||
|
||||
@classmethod
|
||||
def is_cancelable(cls):
|
||||
return True
|
||||
|
||||
def expand_column_types(self, goal, current):
|
||||
reference_columns = {
|
||||
c.name: c for c in
|
||||
self.get_columns_in_relation(goal)
|
||||
}
|
||||
|
||||
target_columns = {
|
||||
c.name: c for c
|
||||
in self.get_columns_in_relation(current)
|
||||
}
|
||||
|
||||
for column_name, reference_column in reference_columns.items():
|
||||
target_column = target_columns.get(column_name)
|
||||
|
||||
if target_column is not None and \
|
||||
target_column.can_expand_to(reference_column):
|
||||
col_string_size = reference_column.string_size()
|
||||
new_type = self.Column.string_type(col_string_size)
|
||||
logger.debug("Changing col type from %s to %s in table %s",
|
||||
target_column.data_type, new_type, current)
|
||||
|
||||
self.alter_column_type(current, column_name, new_type)
|
||||
|
||||
def alter_column_type(self, relation, column_name, new_column_type):
|
||||
"""
|
||||
1. Create a new column (w/ temp name and correct type)
|
||||
2. Copy data over to it
|
||||
3. Drop the existing column (cascade!)
|
||||
4. Rename the new column to existing column
|
||||
"""
|
||||
kwargs = {
|
||||
'relation': relation,
|
||||
'column_name': column_name,
|
||||
'new_column_type': new_column_type,
|
||||
}
|
||||
self.execute_macro(
|
||||
ALTER_COLUMN_TYPE_MACRO_NAME,
|
||||
kwargs=kwargs
|
||||
)
|
||||
|
||||
def drop_relation(self, relation):
|
||||
if dbt.flags.USE_CACHE:
|
||||
self.cache.drop(relation)
|
||||
if relation.type is None:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Tried to drop relation {}, but its type is null.'
|
||||
.format(relation))
|
||||
|
||||
self.execute_macro(
|
||||
DROP_RELATION_MACRO_NAME,
|
||||
kwargs={'relation': relation}
|
||||
)
|
||||
|
||||
def truncate_relation(self, relation):
|
||||
self.execute_macro(
|
||||
TRUNCATE_RELATION_MACRO_NAME,
|
||||
kwargs={'relation': relation}
|
||||
)
|
||||
|
||||
def rename_relation(self, from_relation, to_relation):
|
||||
if dbt.flags.USE_CACHE:
|
||||
self.cache.rename(from_relation, to_relation)
|
||||
|
||||
kwargs = {'from_relation': from_relation, 'to_relation': to_relation}
|
||||
self.execute_macro(
|
||||
RENAME_RELATION_MACRO_NAME,
|
||||
kwargs=kwargs
|
||||
)
|
||||
|
||||
def get_columns_in_relation(self, relation):
|
||||
return self.execute_macro(
|
||||
GET_COLUMNS_IN_RELATION_MACRO_NAME,
|
||||
kwargs={'relation': relation}
|
||||
)
|
||||
|
||||
def create_schema(self, database, schema):
|
||||
logger.debug('Creating schema "%s"."%s".', database, schema)
|
||||
kwargs = {
|
||||
'database_name': self.quote_as_configured(database, 'database'),
|
||||
'schema_name': self.quote_as_configured(schema, 'schema'),
|
||||
}
|
||||
self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs)
|
||||
self.commit_if_has_connection()
|
||||
|
||||
def drop_schema(self, database, schema):
|
||||
logger.debug('Dropping schema "%s"."%s".', database, schema)
|
||||
kwargs = {
|
||||
'database_name': self.quote_as_configured(database, 'database'),
|
||||
'schema_name': self.quote_as_configured(schema, 'schema'),
|
||||
}
|
||||
self.execute_macro(DROP_SCHEMA_MACRO_NAME,
|
||||
kwargs=kwargs)
|
||||
|
||||
def list_relations_without_caching(self, information_schema, schema):
|
||||
kwargs = {'information_schema': information_schema, 'schema': schema}
|
||||
results = self.execute_macro(
|
||||
LIST_RELATIONS_MACRO_NAME,
|
||||
kwargs=kwargs
|
||||
)
|
||||
|
||||
relations = []
|
||||
quote_policy = {
|
||||
'database': True,
|
||||
'schema': True,
|
||||
'identifier': True
|
||||
}
|
||||
for _database, name, _schema, _type in results:
|
||||
relations.append(self.Relation.create(
|
||||
database=_database,
|
||||
schema=_schema,
|
||||
identifier=name,
|
||||
quote_policy=quote_policy,
|
||||
type=_type
|
||||
))
|
||||
return relations
|
||||
|
||||
def quote(cls, identifier):
|
||||
return '"{}"'.format(identifier)
|
||||
|
||||
def list_schemas(self, database):
|
||||
results = self.execute_macro(
|
||||
LIST_SCHEMAS_MACRO_NAME,
|
||||
kwargs={'database': database}
|
||||
)
|
||||
|
||||
return [row[0] for row in results]
|
||||
|
||||
def check_schema_exists(self, database, schema):
|
||||
information_schema = self.Relation.create(
|
||||
database=database, schema=schema,
|
||||
quote_policy=self.config.quoting
|
||||
).information_schema()
|
||||
|
||||
kwargs = {'information_schema': information_schema, 'schema': schema}
|
||||
results = self.execute_macro(
|
||||
CHECK_SCHEMA_EXISTS_MACRO_NAME,
|
||||
kwargs=kwargs
|
||||
)
|
||||
return results[0][0] > 0
|
||||
@@ -1,9 +1,10 @@
|
||||
import copy
|
||||
from collections import Mapping
|
||||
from jsonschema import Draft4Validator
|
||||
from jsonschema import Draft7Validator
|
||||
|
||||
from dbt.exceptions import JSONValidationException
|
||||
from dbt.utils import deep_merge
|
||||
from dbt.clients.system import write_json
|
||||
|
||||
|
||||
class APIObject(Mapping):
|
||||
@@ -61,6 +62,9 @@ class APIObject(Mapping):
|
||||
"""
|
||||
return copy.deepcopy(self._contents)
|
||||
|
||||
def write(self, path):
|
||||
write_json(path, self.serialize())
|
||||
|
||||
@classmethod
|
||||
def deserialize(cls, settings):
|
||||
"""
|
||||
@@ -75,7 +79,7 @@ class APIObject(Mapping):
|
||||
of this instance. If any attributes are missing or
|
||||
invalid, raise a ValidationException.
|
||||
"""
|
||||
validator = Draft4Validator(self.SCHEMA)
|
||||
validator = Draft7Validator(self.SCHEMA)
|
||||
|
||||
errors = set() # make errors a set to avoid duplicates
|
||||
|
||||
371
core/dbt/clients/_jinja_blocks.py
Normal file
371
core/dbt/clients/_jinja_blocks.py
Normal file
@@ -0,0 +1,371 @@
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
import dbt.exceptions
|
||||
|
||||
|
||||
def regex(pat):
|
||||
return re.compile(pat, re.DOTALL | re.MULTILINE)
|
||||
|
||||
|
||||
class BlockData(object):
|
||||
"""raw plaintext data from the top level of the file."""
|
||||
def __init__(self, contents):
|
||||
self.block_type_name = '__dbt__data'
|
||||
self.contents = contents
|
||||
self.full_block = contents
|
||||
|
||||
|
||||
class BlockTag(object):
|
||||
def __init__(self, block_type_name, block_name, contents=None,
|
||||
full_block=None, **kw):
|
||||
self.block_type_name = block_type_name
|
||||
self.block_name = block_name
|
||||
self.contents = contents
|
||||
self.full_block = full_block
|
||||
|
||||
def __str__(self):
|
||||
return 'BlockTag({!r}, {!r})'.format(self.block_type_name,
|
||||
self.block_name)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self)
|
||||
|
||||
@property
|
||||
def end_block_type_name(self):
|
||||
return 'end{}'.format(self.block_type_name)
|
||||
|
||||
def end_pat(self):
|
||||
# we don't want to use string formatting here because jinja uses most
|
||||
# of the string formatting operators in its syntax...
|
||||
pattern = ''.join((
|
||||
r'(?P<endblock>((?:\s*\{\%\-|\{\%)\s*',
|
||||
self.end_block_type_name,
|
||||
r'\s*(?:\-\%\}\s*|\%\})))',
|
||||
))
|
||||
return regex(pattern)
|
||||
|
||||
|
||||
Tag = namedtuple('Tag', 'block_type_name block_name start end')
|
||||
|
||||
|
||||
_NAME_PATTERN = r'[A-Za-z_][A-Za-z_0-9]*'
|
||||
|
||||
COMMENT_START_PATTERN = regex(r'(?:(?P<comment_start>(\s*\{\#)))')
|
||||
COMMENT_END_PATTERN = regex(r'(.*?)(\s*\#\})')
|
||||
RAW_START_PATTERN = regex(
|
||||
r'(?:\s*\{\%\-|\{\%)\s*(?P<raw_start>(raw))\s*(?:\-\%\}\s*|\%\})'
|
||||
)
|
||||
EXPR_START_PATTERN = regex(r'(?P<expr_start>(\{\{\s*))')
|
||||
EXPR_END_PATTERN = regex(r'(?P<expr_end>(\s*\}\}))')
|
||||
|
||||
BLOCK_START_PATTERN = regex(''.join((
|
||||
r'(?:\s*\{\%\-|\{\%)\s*',
|
||||
r'(?P<block_type_name>({}))'.format(_NAME_PATTERN),
|
||||
# some blocks have a 'block name'.
|
||||
r'(?:\s+(?P<block_name>({})))?'.format(_NAME_PATTERN),
|
||||
)))
|
||||
|
||||
|
||||
RAW_BLOCK_PATTERN = regex(''.join((
|
||||
r'(?:\s*\{\%\-|\{\%)\s*raw\s*(?:\-\%\}\s*|\%\})',
|
||||
r'(?:.*)',
|
||||
r'(?:\s*\{\%\-|\{\%)\s*endraw\s*(?:\-\%\}\s*|\%\})',
|
||||
)))
|
||||
|
||||
TAG_CLOSE_PATTERN = regex(r'(?:(?P<tag_close>(\-\%\}\s*|\%\})))')
|
||||
|
||||
# stolen from jinja's lexer. Note that we've consumed all prefix whitespace by
|
||||
# the time we want to use this.
|
||||
STRING_PATTERN = regex(
|
||||
r"(?P<string>('([^'\\]*(?:\\.[^'\\]*)*)'|"
|
||||
r'"([^"\\]*(?:\\.[^"\\]*)*)"))'
|
||||
)
|
||||
|
||||
QUOTE_START_PATTERN = regex(r'''(?P<quote>(['"]))''')
|
||||
|
||||
|
||||
class TagIterator(object):
|
||||
def __init__(self, data):
|
||||
self.data = data
|
||||
self.blocks = []
|
||||
self._parenthesis_stack = []
|
||||
self.pos = 0
|
||||
|
||||
def advance(self, new_position):
|
||||
self.pos = new_position
|
||||
|
||||
def rewind(self, amount=1):
|
||||
self.pos -= amount
|
||||
|
||||
def _search(self, pattern):
|
||||
return pattern.search(self.data, self.pos)
|
||||
|
||||
def _match(self, pattern):
|
||||
return pattern.match(self.data, self.pos)
|
||||
|
||||
def _first_match(self, *patterns, **kwargs):
|
||||
matches = []
|
||||
for pattern in patterns:
|
||||
# default to 'search', but sometimes we want to 'match'.
|
||||
if kwargs.get('method', 'search') == 'search':
|
||||
match = self._search(pattern)
|
||||
else:
|
||||
match = self._match(pattern)
|
||||
if match:
|
||||
matches.append(match)
|
||||
if not matches:
|
||||
return None
|
||||
# if there are multiple matches, pick the least greedy match
|
||||
# TODO: do I need to account for m.start(), or is this ok?
|
||||
return min(matches, key=lambda m: m.end())
|
||||
|
||||
def _expect_match(self, expected_name, *patterns, **kwargs):
|
||||
match = self._first_match(*patterns, **kwargs)
|
||||
if match is None:
|
||||
msg = 'unexpected EOF, expected {}, got "{}"'.format(
|
||||
expected_name, self.data[self.pos:]
|
||||
)
|
||||
dbt.exceptions.raise_compiler_error(msg)
|
||||
return match
|
||||
|
||||
def handle_expr(self, match):
|
||||
"""Handle an expression. At this point we're at a string like:
|
||||
{{ 1 + 2 }}
|
||||
^ right here
|
||||
|
||||
And the match contains "{{ "
|
||||
|
||||
We expect to find a `}}`, but we might find one in a string before
|
||||
that. Imagine the case of `{{ 2 * "}}" }}`...
|
||||
|
||||
You're not allowed to have blocks or comments inside an expr so it is
|
||||
pretty straightforward, I hope: only strings can get in the way.
|
||||
"""
|
||||
self.advance(match.end())
|
||||
while True:
|
||||
match = self._expect_match('}}',
|
||||
EXPR_END_PATTERN,
|
||||
QUOTE_START_PATTERN)
|
||||
if match.groupdict().get('expr_end') is not None:
|
||||
break
|
||||
else:
|
||||
# it's a quote. we haven't advanced for this match yet, so
|
||||
# just slurp up the whole string, no need to rewind.
|
||||
match = self._expect_match('string', STRING_PATTERN)
|
||||
self.advance(match.end())
|
||||
|
||||
self.advance(match.end())
|
||||
|
||||
def handle_comment(self, match):
|
||||
self.advance(match.end())
|
||||
match = self._expect_match('#}', COMMENT_END_PATTERN)
|
||||
self.advance(match.end())
|
||||
|
||||
def _expect_block_close(self):
|
||||
"""Search for the tag close marker.
|
||||
To the right of the type name, there are a few possiblities:
|
||||
- a name (handled by the regex's 'block_name')
|
||||
- any number of: `=`, `(`, `)`, strings, etc (arguments)
|
||||
- nothing
|
||||
|
||||
followed eventually by a %}
|
||||
|
||||
So the only characters we actually have to worry about in this context
|
||||
are quote and `%}` - nothing else can hide the %} and be valid jinja.
|
||||
"""
|
||||
while True:
|
||||
end_match = self._expect_match(
|
||||
'tag close ("%}")',
|
||||
QUOTE_START_PATTERN,
|
||||
TAG_CLOSE_PATTERN
|
||||
)
|
||||
self.advance(end_match.end())
|
||||
if end_match.groupdict().get('tag_close') is not None:
|
||||
return
|
||||
# must be a string. Rewind to its start and advance past it.
|
||||
self.rewind()
|
||||
string_match = self._expect_match('string', STRING_PATTERN)
|
||||
self.advance(string_match.end())
|
||||
|
||||
def handle_raw(self):
|
||||
# raw blocks are super special, they are a single complete regex
|
||||
match = self._expect_match('{% raw %}...{% endraw %}',
|
||||
RAW_BLOCK_PATTERN)
|
||||
self.advance(match.end())
|
||||
return match.end()
|
||||
|
||||
def handle_tag(self, match):
|
||||
"""The tag could be one of a few things:
|
||||
|
||||
{% mytag %}
|
||||
{% mytag x = y %}
|
||||
{% mytag x = "y" %}
|
||||
{% mytag x.y() %}
|
||||
{% mytag foo("a", "b", c="d") %}
|
||||
|
||||
But the key here is that it's always going to be `{% mytag`!
|
||||
"""
|
||||
groups = match.groupdict()
|
||||
# always a value
|
||||
block_type_name = groups['block_type_name']
|
||||
# might be None
|
||||
block_name = groups.get('block_name')
|
||||
start_pos = self.pos
|
||||
if block_type_name == 'raw':
|
||||
match = self._expect_match('{% raw %}...{% endraw %}',
|
||||
RAW_BLOCK_PATTERN)
|
||||
self.advance(match.end())
|
||||
else:
|
||||
self.advance(match.end())
|
||||
self._expect_block_close()
|
||||
return Tag(
|
||||
block_type_name=block_type_name,
|
||||
block_name=block_name,
|
||||
start=start_pos,
|
||||
end=self.pos
|
||||
)
|
||||
|
||||
def find_tags(self):
|
||||
while True:
|
||||
match = self._first_match(
|
||||
BLOCK_START_PATTERN,
|
||||
COMMENT_START_PATTERN,
|
||||
EXPR_START_PATTERN
|
||||
)
|
||||
if match is None:
|
||||
break
|
||||
|
||||
self.advance(match.start())
|
||||
# start = self.pos
|
||||
|
||||
groups = match.groupdict()
|
||||
comment_start = groups.get('comment_start')
|
||||
expr_start = groups.get('expr_start')
|
||||
block_type_name = groups.get('block_type_name')
|
||||
|
||||
if comment_start is not None:
|
||||
self.handle_comment(match)
|
||||
elif expr_start is not None:
|
||||
self.handle_expr(match)
|
||||
elif block_type_name is not None:
|
||||
yield self.handle_tag(match)
|
||||
else:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Invalid regex match in next_block, expected block start, '
|
||||
'expr start, or comment start'
|
||||
)
|
||||
|
||||
def __iter__(self):
|
||||
return self.find_tags()
|
||||
|
||||
|
||||
duplicate_tags = (
|
||||
'Got nested tags: {outer.block_type_name} (started at {outer.start}) did '
|
||||
'not have a matching {{% end{outer.block_type_name} %}} before a '
|
||||
'subsequent {inner.block_type_name} was found (started at {inner.start})'
|
||||
)
|
||||
|
||||
|
||||
_CONTROL_FLOW_TAGS = {
|
||||
'if': 'endif',
|
||||
'for': 'endfor',
|
||||
}
|
||||
|
||||
_CONTROL_FLOW_END_TAGS = {
|
||||
v: k
|
||||
for k, v in _CONTROL_FLOW_TAGS.items()
|
||||
}
|
||||
|
||||
|
||||
class BlockIterator(object):
|
||||
def __init__(self, data):
|
||||
self.tag_parser = TagIterator(data)
|
||||
self.current = None
|
||||
self.stack = []
|
||||
self.last_position = 0
|
||||
|
||||
@property
|
||||
def current_end(self):
|
||||
if self.current is None:
|
||||
return 0
|
||||
else:
|
||||
return self.current.end
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return self.tag_parser.data
|
||||
|
||||
def is_current_end(self, tag):
|
||||
return (
|
||||
tag.block_type_name.startswith('end') and
|
||||
self.current is not None and
|
||||
tag.block_type_name[3:] == self.current.block_type_name
|
||||
)
|
||||
|
||||
def find_blocks(self, allowed_blocks=None, collect_raw_data=True):
|
||||
"""Find all top-level blocks in the data."""
|
||||
if allowed_blocks is None:
|
||||
allowed_blocks = {'snapshot', 'macro', 'materialization', 'docs'}
|
||||
|
||||
for tag in self.tag_parser.find_tags():
|
||||
if tag.block_type_name in _CONTROL_FLOW_TAGS:
|
||||
self.stack.append(tag.block_type_name)
|
||||
elif tag.block_type_name in _CONTROL_FLOW_END_TAGS:
|
||||
found = None
|
||||
if self.stack:
|
||||
found = self.stack.pop()
|
||||
else:
|
||||
expected = _CONTROL_FLOW_END_TAGS[tag.block_type_name]
|
||||
dbt.exceptions.raise_compiler_error((
|
||||
'Got an unexpected control flow end tag, got {} but '
|
||||
'never saw a preceeding {} (@ {})'
|
||||
).format(tag.block_type_name, expected, tag.start))
|
||||
expected = _CONTROL_FLOW_TAGS[found]
|
||||
if expected != tag.block_type_name:
|
||||
dbt.exceptions.raise_compiler_error((
|
||||
'Got an unexpected control flow end tag, got {} but '
|
||||
'expected {} next (@ {})'
|
||||
).format(tag.block_type_name, expected, tag.start))
|
||||
|
||||
if tag.block_type_name in allowed_blocks:
|
||||
if self.stack:
|
||||
dbt.exceptions.raise_compiler_error((
|
||||
'Got a block definition inside control flow at {}. '
|
||||
'All dbt block definitions must be at the top level'
|
||||
).format(tag.start))
|
||||
if self.current is not None:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
duplicate_tags.format(outer=self.current, inner=tag)
|
||||
)
|
||||
if collect_raw_data:
|
||||
raw_data = self.data[self.last_position:tag.start]
|
||||
self.last_position = tag.start
|
||||
if raw_data:
|
||||
yield BlockData(raw_data)
|
||||
self.current = tag
|
||||
|
||||
elif self.is_current_end(tag):
|
||||
self.last_position = tag.end
|
||||
yield BlockTag(
|
||||
block_type_name=self.current.block_type_name,
|
||||
block_name=self.current.block_name,
|
||||
contents=self.data[self.current.end:tag.start],
|
||||
full_block=self.data[self.current.start:tag.end]
|
||||
)
|
||||
self.current = None
|
||||
|
||||
if self.current:
|
||||
dbt.exceptions.raise_compiler_error((
|
||||
'Reached EOF without finding a close block for '
|
||||
'{0.block_type_name} (from {0.end})'
|
||||
).format(self.current))
|
||||
|
||||
if collect_raw_data:
|
||||
raw_data = self.data[self.last_position:]
|
||||
if raw_data:
|
||||
yield BlockData(raw_data)
|
||||
|
||||
def lex_for_blocks(self, allowed_blocks=None, collect_raw_data=True):
|
||||
return list(self.find_blocks(allowed_blocks=allowed_blocks,
|
||||
collect_raw_data=collect_raw_data))
|
||||
@@ -1,5 +1,7 @@
|
||||
import dbt.compat
|
||||
|
||||
import agate
|
||||
import json
|
||||
|
||||
DEFAULT_TYPE_TESTER = agate.TypeTester(types=[
|
||||
agate.data_types.Number(null_values=('null', '')),
|
||||
@@ -28,6 +30,22 @@ def table_from_data(data, column_names):
|
||||
return table.select(column_names)
|
||||
|
||||
|
||||
def table_from_data_flat(data, column_names):
|
||||
"Convert list of dictionaries into an Agate table"
|
||||
|
||||
rows = []
|
||||
for _row in data:
|
||||
row = []
|
||||
for value in list(_row.values()):
|
||||
if isinstance(value, (dict, list, tuple)):
|
||||
row.append(json.dumps(value))
|
||||
else:
|
||||
row.append(value)
|
||||
rows.append(row)
|
||||
|
||||
return agate.Table(rows, column_names)
|
||||
|
||||
|
||||
def empty_table():
|
||||
"Returns an empty Agate table. To be used in place of None"
|
||||
|
||||
@@ -41,4 +59,7 @@ def as_matrix(table):
|
||||
|
||||
|
||||
def from_csv(abspath):
|
||||
return agate.Table.from_csv(abspath, column_types=DEFAULT_TYPE_TESTER)
|
||||
with dbt.compat.open_seed_file(abspath) as fp:
|
||||
if fp.read(len(dbt.compat.BOM_UTF8)) != dbt.compat.BOM_UTF8:
|
||||
fp.seek(0)
|
||||
return agate.Table.from_csv(fp, column_types=DEFAULT_TYPE_TESTER)
|
||||
@@ -12,7 +12,7 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False):
|
||||
if dirname is not None:
|
||||
clone_cmd.append(dirname)
|
||||
|
||||
result = run_cmd(cwd, clone_cmd)
|
||||
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
|
||||
|
||||
if remove_git_dir:
|
||||
rmdir(os.path.join(dirname, '.git'))
|
||||
@@ -21,15 +21,12 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False):
|
||||
|
||||
|
||||
def list_tags(cwd):
|
||||
out, err = run_cmd(cwd, ['git', 'tag', '--list'])
|
||||
out, err = run_cmd(cwd, ['git', 'tag', '--list'], env={'LC_ALL': 'C'})
|
||||
tags = out.decode('utf-8').strip().split("\n")
|
||||
return tags
|
||||
|
||||
|
||||
def checkout(cwd, repo, branch=None):
|
||||
if branch is None:
|
||||
branch = 'master'
|
||||
|
||||
def _checkout(cwd, repo, branch):
|
||||
logger.debug(' Checking out branch {}.'.format(branch))
|
||||
|
||||
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
|
||||
@@ -43,30 +40,43 @@ def checkout(cwd, repo, branch=None):
|
||||
else:
|
||||
spec = 'origin/{}'.format(branch)
|
||||
|
||||
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec])
|
||||
stderr = err.decode('utf-8').strip()
|
||||
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec],
|
||||
env={'LC_ALL': 'C'})
|
||||
return out, err
|
||||
|
||||
if stderr.startswith('fatal:'):
|
||||
dbt.exceptions.bad_package_spec(repo, branch, stderr)
|
||||
else:
|
||||
return out, err
|
||||
|
||||
def checkout(cwd, repo, branch=None):
|
||||
if branch is None:
|
||||
branch = 'master'
|
||||
try:
|
||||
return _checkout(cwd, repo, branch)
|
||||
except dbt.exceptions.CommandResultError as exc:
|
||||
stderr = exc.stderr.decode('utf-8').strip()
|
||||
dbt.exceptions.bad_package_spec(repo, branch, stderr)
|
||||
|
||||
|
||||
def get_current_sha(cwd):
|
||||
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])
|
||||
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'], env={'LC_ALL': 'C'})
|
||||
|
||||
return out.decode('utf-8')
|
||||
|
||||
|
||||
def remove_remote(cwd):
|
||||
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'])
|
||||
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'], env={'LC_ALL': 'C'})
|
||||
|
||||
|
||||
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
|
||||
branch=None):
|
||||
_, err = clone(repo, cwd, dirname=dirname, remove_git_dir=remove_git_dir)
|
||||
exists = re.match("fatal: destination path '(.+)' already exists",
|
||||
err.decode('utf-8'))
|
||||
exists = None
|
||||
try:
|
||||
_, err = clone(repo, cwd, dirname=dirname,
|
||||
remove_git_dir=remove_git_dir)
|
||||
except dbt.exceptions.CommandResultError as exc:
|
||||
err = exc.stderr.decode('utf-8')
|
||||
exists = re.match("fatal: destination path '(.+)' already exists", err)
|
||||
if not exists: # something else is wrong, raise it
|
||||
raise
|
||||
|
||||
directory = None
|
||||
start_sha = None
|
||||
if exists:
|
||||
@@ -1,6 +1,7 @@
|
||||
import codecs
|
||||
import linecache
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import jinja2
|
||||
import jinja2._compat
|
||||
@@ -11,13 +12,41 @@ import jinja2.sandbox
|
||||
|
||||
import dbt.compat
|
||||
import dbt.exceptions
|
||||
import dbt.utils
|
||||
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.utils import AttrDict
|
||||
from dbt.clients._jinja_blocks import BlockIterator
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
|
||||
|
||||
def _linecache_inject(source, write):
|
||||
if write:
|
||||
# this is the only reliable way to accomplish this. Obviously, it's
|
||||
# really darn noisy and will fill your temporary directory
|
||||
tmp_file = tempfile.NamedTemporaryFile(
|
||||
prefix='dbt-macro-compiled-',
|
||||
suffix='.py',
|
||||
delete=False,
|
||||
mode='w+',
|
||||
encoding='utf-8',
|
||||
)
|
||||
tmp_file.write(source)
|
||||
filename = tmp_file.name
|
||||
else:
|
||||
filename = codecs.encode(os.urandom(12), 'hex').decode('ascii')
|
||||
|
||||
# encode, though I don't think this matters
|
||||
filename = jinja2._compat.encode_filename(filename)
|
||||
# put ourselves in the cache
|
||||
linecache.cache[filename] = (
|
||||
len(source),
|
||||
None,
|
||||
[line + '\n' for line in source.splitlines()],
|
||||
filename
|
||||
)
|
||||
return filename
|
||||
|
||||
|
||||
class MacroFuzzParser(jinja2.parser.Parser):
|
||||
def parse_macro(self):
|
||||
node = jinja2.nodes.Macro(lineno=next(self.stream).lineno)
|
||||
@@ -43,17 +72,16 @@ class MacroFuzzEnvironment(jinja2.sandbox.SandboxedEnvironment):
|
||||
|
||||
def _compile(self, source, filename):
|
||||
"""Override jinja's compilation to stash the rendered source inside
|
||||
the python linecache for debugging.
|
||||
the python linecache for debugging when the appropriate environment
|
||||
variable is set.
|
||||
|
||||
If the value is 'write', also write the files to disk.
|
||||
WARNING: This can write a ton of data if you aren't careful.
|
||||
"""
|
||||
if filename == '<template>':
|
||||
# make a better filename
|
||||
filename = 'dbt-{}'.format(
|
||||
codecs.encode(os.urandom(12), 'hex').decode('ascii')
|
||||
)
|
||||
# encode, though I don't think this matters
|
||||
filename = jinja2._compat.encode_filename(filename)
|
||||
# put ourselves in the cache using the 'lazycache' method
|
||||
linecache.cache[filename] = (lambda: source,)
|
||||
macro_compile = os.environ.get('DBT_MACRO_DEBUGGING')
|
||||
if filename == '<template>' and macro_compile:
|
||||
write = macro_compile == 'write'
|
||||
filename = _linecache_inject(source, write)
|
||||
|
||||
return super(MacroFuzzEnvironment, self)._compile(source, filename)
|
||||
|
||||
@@ -92,10 +120,7 @@ def macro_generator(node):
|
||||
template = template_cache.get_node_template(node)
|
||||
module = template.make_module(context, False, context)
|
||||
|
||||
if node['resource_type'] == NodeType.Operation:
|
||||
macro = module.__dict__[dbt.utils.get_dbt_operation_name(name)]
|
||||
else:
|
||||
macro = module.__dict__[dbt.utils.get_dbt_macro_name(name)]
|
||||
macro = module.__dict__[dbt.utils.get_dbt_macro_name(name)]
|
||||
module.__dict__.update(context)
|
||||
|
||||
try:
|
||||
@@ -148,28 +173,6 @@ class MaterializationExtension(jinja2.ext.Extension):
|
||||
return node
|
||||
|
||||
|
||||
class OperationExtension(jinja2.ext.Extension):
|
||||
tags = ['operation']
|
||||
|
||||
def parse(self, parser):
|
||||
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
|
||||
operation_name = \
|
||||
parser.parse_assign_target(name_only=True).name
|
||||
|
||||
node.args = []
|
||||
node.defaults = []
|
||||
|
||||
while parser.stream.skip_if('comma'):
|
||||
target = parser.parse_assign_target(name_only=True)
|
||||
|
||||
node.name = dbt.utils.get_operation_macro_name(operation_name)
|
||||
|
||||
node.body = parser.parse_statements(('name:endoperation',),
|
||||
drop_needle=True)
|
||||
|
||||
return node
|
||||
|
||||
|
||||
class DocumentationExtension(jinja2.ext.Extension):
|
||||
tags = ['docs']
|
||||
|
||||
@@ -239,21 +242,20 @@ def create_macro_capture_env(node):
|
||||
return self
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return True
|
||||
return self
|
||||
|
||||
return ParserMacroCapture
|
||||
|
||||
|
||||
def get_environment(node=None, capture_macros=False):
|
||||
args = {
|
||||
'extensions': []
|
||||
'extensions': ['jinja2.ext.do']
|
||||
}
|
||||
|
||||
if capture_macros:
|
||||
args['undefined'] = create_macro_capture_env(node)
|
||||
|
||||
args['extensions'].append(MaterializationExtension)
|
||||
args['extensions'].append(OperationExtension)
|
||||
args['extensions'].append(DocumentationExtension)
|
||||
|
||||
return MacroFuzzEnvironment(**args)
|
||||
@@ -302,3 +304,25 @@ def get_rendered(string, ctx, node=None,
|
||||
|
||||
def undefined_error(msg):
|
||||
raise jinja2.exceptions.UndefinedError(msg)
|
||||
|
||||
|
||||
def extract_toplevel_blocks(data, allowed_blocks=None, collect_raw_data=True):
|
||||
"""Extract the top level blocks with matching block types from a jinja
|
||||
file, with some special handling for block nesting.
|
||||
|
||||
:param str data: The data to extract blocks from.
|
||||
:param Optional[Set[str]] allowed_blocks: The names of the blocks to
|
||||
extract from the file. They may not be nested within if/for blocks.
|
||||
If None, use the default values.
|
||||
:param bool collect_raw_data: If set, raw data between matched blocks will
|
||||
also be part of the results, as `BlockData` objects. They have a
|
||||
`block_type_name` field of `'__dbt_data'` and will never have a
|
||||
`block_name`.
|
||||
:return List[Union[BlockData, BlockTag]]: A list of `BlockTag`s matching
|
||||
the allowed block types and (if `collect_raw_data` is `True`)
|
||||
`BlockData` objects.
|
||||
"""
|
||||
return BlockIterator(data).lex_for_blocks(
|
||||
allowed_blocks=allowed_blocks,
|
||||
collect_raw_data=collect_raw_data
|
||||
)
|
||||
@@ -3,7 +3,9 @@ import six
|
||||
import requests
|
||||
from dbt.exceptions import RegistryException
|
||||
from dbt.utils import memoized
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
import os
|
||||
import time
|
||||
|
||||
if os.getenv('DBT_PACKAGE_HUB_URL'):
|
||||
DEFAULT_REGISTRY_BASE_URL = os.getenv('DBT_PACKAGE_HUB_URL')
|
||||
@@ -21,18 +23,30 @@ def _get_url(url, registry_base_url=None):
|
||||
def _wrap_exceptions(fn):
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
six.raise_from(
|
||||
RegistryException('Unable to connect to registry hub'), e)
|
||||
max_attempts = 5
|
||||
attempt = 0
|
||||
while True:
|
||||
attempt += 1
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
except requests.exceptions.ConnectionError as exc:
|
||||
if attempt < max_attempts:
|
||||
time.sleep(1)
|
||||
continue
|
||||
six.raise_from(
|
||||
RegistryException('Unable to connect to registry hub'),
|
||||
exc
|
||||
)
|
||||
return wrapper
|
||||
|
||||
|
||||
@_wrap_exceptions
|
||||
def _get(path, registry_base_url=None):
|
||||
url = _get_url(path, registry_base_url)
|
||||
logger.debug('Making package registry request: GET {}'.format(url))
|
||||
resp = requests.get(url)
|
||||
logger.debug('Response from registry: GET {} {}'.format(url,
|
||||
resp.status_code))
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
402
core/dbt/clients/system.py
Normal file
402
core/dbt/clients/system.py
Normal file
@@ -0,0 +1,402 @@
|
||||
import errno
|
||||
import fnmatch
|
||||
import json
|
||||
import os
|
||||
import os.path
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import requests
|
||||
import stat
|
||||
|
||||
import dbt.compat
|
||||
import dbt.exceptions
|
||||
import dbt.utils
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
|
||||
def find_matching(root_path,
|
||||
relative_paths_to_search,
|
||||
file_pattern):
|
||||
"""
|
||||
Given an absolute `root_path`, a list of relative paths to that
|
||||
absolute root path (`relative_paths_to_search`), and a `file_pattern`
|
||||
like '*.sql', returns information about the files. For example:
|
||||
|
||||
> find_matching('/root/path', 'models', '*.sql')
|
||||
|
||||
[ { 'absolute_path': '/root/path/models/model_one.sql',
|
||||
'relative_path': 'models/model_one.sql',
|
||||
'searched_path': 'models' },
|
||||
{ 'absolute_path': '/root/path/models/subdirectory/model_two.sql',
|
||||
'relative_path': 'models/subdirectory/model_two.sql',
|
||||
'searched_path': 'models' } ]
|
||||
"""
|
||||
matching = []
|
||||
root_path = os.path.normpath(root_path)
|
||||
|
||||
for relative_path_to_search in relative_paths_to_search:
|
||||
absolute_path_to_search = os.path.join(
|
||||
root_path, relative_path_to_search)
|
||||
walk_results = os.walk(absolute_path_to_search)
|
||||
|
||||
for current_path, subdirectories, local_files in walk_results:
|
||||
for local_file in local_files:
|
||||
absolute_path = os.path.join(current_path, local_file)
|
||||
relative_path = os.path.relpath(
|
||||
absolute_path, absolute_path_to_search)
|
||||
|
||||
if fnmatch.fnmatch(local_file, file_pattern):
|
||||
matching.append({
|
||||
'searched_path': relative_path_to_search,
|
||||
'absolute_path': absolute_path,
|
||||
'relative_path': relative_path,
|
||||
})
|
||||
|
||||
return matching
|
||||
|
||||
|
||||
def load_file_contents(path, strip=True):
|
||||
with open(path, 'rb') as handle:
|
||||
to_return = handle.read().decode('utf-8')
|
||||
|
||||
if strip:
|
||||
to_return = to_return.strip()
|
||||
|
||||
return to_return
|
||||
|
||||
|
||||
def make_directory(path):
|
||||
"""
|
||||
Make a directory and any intermediate directories that don't already
|
||||
exist. This function handles the case where two threads try to create
|
||||
a directory at once.
|
||||
"""
|
||||
if not os.path.exists(path):
|
||||
# concurrent writes that try to create the same dir can fail
|
||||
try:
|
||||
os.makedirs(path)
|
||||
|
||||
except OSError as e:
|
||||
if e.errno == errno.EEXIST:
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
def make_file(path, contents='', overwrite=False):
|
||||
"""
|
||||
Make a file at `path` assuming that the directory it resides in already
|
||||
exists. The file is saved with contents `contents`
|
||||
"""
|
||||
if overwrite or not os.path.exists(path):
|
||||
with open(path, 'w') as fh:
|
||||
fh.write(contents)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def make_symlink(source, link_path):
|
||||
"""
|
||||
Create a symlink at `link_path` referring to `source`.
|
||||
"""
|
||||
if not supports_symlinks():
|
||||
dbt.exceptions.system_error('create a symbolic link')
|
||||
|
||||
return os.symlink(source, link_path)
|
||||
|
||||
|
||||
def supports_symlinks():
|
||||
return getattr(os, "symlink", None) is not None
|
||||
|
||||
|
||||
def write_file(path, contents=''):
|
||||
make_directory(os.path.dirname(path))
|
||||
dbt.compat.write_file(path, contents)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def write_json(path, data):
|
||||
return write_file(path, json.dumps(data, cls=dbt.utils.JSONEncoder))
|
||||
|
||||
|
||||
def _windows_rmdir_readonly(func, path, exc):
|
||||
exception_val = exc[1]
|
||||
if exception_val.errno == errno.EACCES:
|
||||
os.chmod(path, stat.S_IWUSR)
|
||||
func(path)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def resolve_path_from_base(path_to_resolve, base_path):
|
||||
"""
|
||||
If path-to_resolve is a relative path, create an absolute path
|
||||
with base_path as the base.
|
||||
|
||||
If path_to_resolve is an absolute path or a user path (~), just
|
||||
resolve it to an absolute path and return.
|
||||
"""
|
||||
return os.path.abspath(
|
||||
os.path.join(
|
||||
base_path,
|
||||
os.path.expanduser(path_to_resolve)))
|
||||
|
||||
|
||||
def rmdir(path):
|
||||
"""
|
||||
Recursively deletes a directory. Includes an error handler to retry with
|
||||
different permissions on Windows. Otherwise, removing directories (eg.
|
||||
cloned via git) can cause rmtree to throw a PermissionError exception
|
||||
"""
|
||||
logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
|
||||
if sys.platform == 'win32':
|
||||
onerror = _windows_rmdir_readonly
|
||||
else:
|
||||
onerror = None
|
||||
|
||||
return shutil.rmtree(path, onerror=onerror)
|
||||
|
||||
|
||||
def remove_file(path):
|
||||
return os.remove(path)
|
||||
|
||||
|
||||
def path_exists(path):
|
||||
return os.path.lexists(path)
|
||||
|
||||
|
||||
def path_is_symlink(path):
|
||||
return os.path.islink(path)
|
||||
|
||||
|
||||
def open_dir_cmd():
|
||||
# https://docs.python.org/2/library/sys.html#sys.platform
|
||||
if sys.platform == 'win32':
|
||||
return 'start'
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
return 'open'
|
||||
|
||||
else:
|
||||
return 'xdg-open'
|
||||
|
||||
|
||||
def _handle_posix_cwd_error(exc, cwd, cmd):
|
||||
if exc.errno == errno.ENOENT:
|
||||
message = 'Directory does not exist'
|
||||
elif exc.errno == errno.EACCES:
|
||||
message = 'Current user cannot access directory, check permissions'
|
||||
elif exc.errno == errno.ENOTDIR:
|
||||
message = 'Not a directory'
|
||||
else:
|
||||
message = 'Unknown OSError: {} - cwd'.format(str(exc))
|
||||
raise dbt.exceptions.WorkingDirectoryError(cwd, cmd, message)
|
||||
|
||||
|
||||
def _handle_posix_cmd_error(exc, cwd, cmd):
|
||||
if exc.errno == errno.ENOENT:
|
||||
message = "Could not find command, ensure it is in the user's PATH"
|
||||
elif exc.errno == errno.EACCES:
|
||||
message = 'User does not have permissions for this command'
|
||||
else:
|
||||
message = 'Unknown OSError: {} - cmd'.format(str(exc))
|
||||
raise dbt.exceptions.ExecutableError(cwd, cmd, message)
|
||||
|
||||
|
||||
def _handle_posix_error(exc, cwd, cmd):
|
||||
"""OSError handling for posix systems.
|
||||
|
||||
Some things that could happen to trigger an OSError:
|
||||
- cwd could not exist
|
||||
- exc.errno == ENOENT
|
||||
- exc.filename == cwd
|
||||
- cwd could have permissions that prevent the current user moving to it
|
||||
- exc.errno == EACCES
|
||||
- exc.filename == cwd
|
||||
- cwd could exist but not be a directory
|
||||
- exc.errno == ENOTDIR
|
||||
- exc.filename == cwd
|
||||
- cmd[0] could not exist
|
||||
- exc.errno == ENOENT
|
||||
- exc.filename == None(?)
|
||||
- cmd[0] could exist but have permissions that prevents the current
|
||||
user from executing it (executable bit not set for the user)
|
||||
- exc.errno == EACCES
|
||||
- exc.filename == None(?)
|
||||
"""
|
||||
if getattr(exc, 'filename', None) == cwd:
|
||||
_handle_posix_cwd_error(exc, cwd, cmd)
|
||||
else:
|
||||
_handle_posix_cmd_error(exc, cwd, cmd)
|
||||
|
||||
|
||||
def _handle_windows_error(exc, cwd, cmd):
|
||||
cls = dbt.exceptions.CommandError
|
||||
if exc.errno == errno.ENOENT:
|
||||
message = ("Could not find command, ensure it is in the user's PATH "
|
||||
"and that the user has permissions to run it")
|
||||
cls = dbt.exceptions.ExecutableError
|
||||
elif exc.errno == errno.ENOEXEC:
|
||||
message = ('Command was not executable, ensure it is valid')
|
||||
cls = dbt.exceptions.ExecutableError
|
||||
elif exc.errno == errno.ENOTDIR:
|
||||
message = ('Unable to cd: path does not exist, user does not have'
|
||||
' permissions, or not a directory')
|
||||
cls = dbt.exceptions.WorkingDirectoryError
|
||||
else:
|
||||
message = 'Unknown error: {} (errno={}: "{}")'.format(
|
||||
str(exc), exc.errno, errno.errorcode.get(exc.errno, '<Unknown!>')
|
||||
)
|
||||
raise cls(cwd, cmd, message)
|
||||
|
||||
|
||||
def _interpret_oserror(exc, cwd, cmd):
|
||||
"""Interpret an OSError exc and raise the appropriate dbt exception.
|
||||
|
||||
"""
|
||||
if len(cmd) == 0:
|
||||
raise dbt.exceptions.CommandError(cwd, cmd)
|
||||
|
||||
# all of these functions raise unconditionally
|
||||
if os.name == 'nt':
|
||||
_handle_windows_error(exc, cwd, cmd)
|
||||
else:
|
||||
_handle_posix_error(exc, cwd, cmd)
|
||||
|
||||
# this should not be reachable, raise _something_ at least!
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Unhandled exception in _interpret_oserror: {}'.format(exc)
|
||||
)
|
||||
|
||||
|
||||
def run_cmd(cwd, cmd, env=None):
|
||||
logger.debug('Executing "{}"'.format(' '.join(cmd)))
|
||||
if len(cmd) == 0:
|
||||
raise dbt.exceptions.CommandError(cwd, cmd)
|
||||
|
||||
# the env argument replaces the environment entirely, which has exciting
|
||||
# consequences on Windows! Do an update instead.
|
||||
full_env = env
|
||||
if env is not None:
|
||||
full_env = os.environ.copy()
|
||||
full_env.update(env)
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env=full_env)
|
||||
|
||||
out, err = proc.communicate()
|
||||
except OSError as exc:
|
||||
_interpret_oserror(exc, cwd, cmd)
|
||||
|
||||
logger.debug('STDOUT: "{}"'.format(out))
|
||||
logger.debug('STDERR: "{}"'.format(err))
|
||||
|
||||
if proc.returncode != 0:
|
||||
logger.debug('command return code={}'.format(proc.returncode))
|
||||
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
|
||||
out, err)
|
||||
|
||||
return out, err
|
||||
|
||||
|
||||
def download(url, path):
|
||||
response = requests.get(url)
|
||||
with open(path, 'wb') as handle:
|
||||
for block in response.iter_content(1024 * 64):
|
||||
handle.write(block)
|
||||
|
||||
|
||||
def rename(from_path, to_path, force=False):
|
||||
is_symlink = path_is_symlink(to_path)
|
||||
|
||||
if os.path.exists(to_path) and force:
|
||||
if is_symlink:
|
||||
remove_file(to_path)
|
||||
else:
|
||||
rmdir(to_path)
|
||||
|
||||
shutil.move(from_path, to_path)
|
||||
|
||||
|
||||
def untar_package(tar_path, dest_dir, rename_to=None):
|
||||
tar_dir_name = None
|
||||
with tarfile.open(tar_path, 'r') as tarball:
|
||||
tarball.extractall(dest_dir)
|
||||
tar_dir_name = os.path.commonprefix(tarball.getnames())
|
||||
if rename_to:
|
||||
downloaded_path = os.path.join(dest_dir, tar_dir_name)
|
||||
desired_path = os.path.join(dest_dir, rename_to)
|
||||
dbt.clients.system.rename(downloaded_path, desired_path, force=True)
|
||||
|
||||
|
||||
def chmod_and_retry(func, path, exc_info):
|
||||
"""Define an error handler to pass to shutil.rmtree.
|
||||
On Windows, when a file is marked read-only as git likes to do, rmtree will
|
||||
fail. To handle that, on errors try to make the file writable.
|
||||
We want to retry most operations here, but listdir is one that we know will
|
||||
be useless.
|
||||
"""
|
||||
if func is os.listdir or os.name != 'nt':
|
||||
raise
|
||||
os.chmod(path, stat.S_IREAD | stat.S_IWRITE)
|
||||
# on error,this will raise.
|
||||
func(path)
|
||||
|
||||
|
||||
def _absnorm(path):
|
||||
return os.path.normcase(os.path.abspath(path))
|
||||
|
||||
|
||||
def move(src, dst):
|
||||
"""A re-implementation of shutil.move that properly removes the source
|
||||
directory on windows when it has read-only files in it and the move is
|
||||
between two drives.
|
||||
|
||||
This is almost identical to the real shutil.move, except it uses our rmtree
|
||||
and skips handling non-windows OSes since the existing one works ok there.
|
||||
"""
|
||||
if os.name != 'nt':
|
||||
return shutil.move(src, dst)
|
||||
|
||||
if os.path.isdir(dst):
|
||||
if _absnorm(src) == _absnorm(dst):
|
||||
os.rename(src, dst)
|
||||
return
|
||||
|
||||
dst = os.path.join(dst, os.path.basename(src.rstrip('/\\')))
|
||||
if os.path.exists(dst):
|
||||
raise EnvironmentError("Path '{}' already exists".format(dst))
|
||||
|
||||
try:
|
||||
os.rename(src, dst)
|
||||
except OSError:
|
||||
# probably different drives
|
||||
if os.path.isdir(src):
|
||||
if _absnorm(dst + '\\').startswith(_absnorm(src + '\\')):
|
||||
# dst is inside src
|
||||
raise EnvironmentError(
|
||||
"Cannot move a directory '{}' into itself '{}'"
|
||||
.format(src, dst)
|
||||
)
|
||||
shutil.copytree(src, dst, symlinks=True)
|
||||
rmtree(src)
|
||||
else:
|
||||
shutil.copy2(src, dst)
|
||||
os.unlink(src)
|
||||
|
||||
|
||||
def rmtree(path):
|
||||
"""Recursively remove path. On permissions errors on windows, try to remove
|
||||
the read-only flag and try again.
|
||||
"""
|
||||
return shutil.rmtree(path, onerror=chmod_and_retry)
|
||||
152
core/dbt/compat.py
Normal file
152
core/dbt/compat.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# flake8: noqa
|
||||
|
||||
import abc
|
||||
import codecs
|
||||
import warnings
|
||||
import decimal
|
||||
|
||||
try:
|
||||
import cdecimal
|
||||
except ImportError:
|
||||
DECIMALS = (decimal.Decimal,)
|
||||
else:
|
||||
DECIMALS = (decimal.Decimal, cdecimal.Decimal)
|
||||
|
||||
WHICH_PYTHON = None
|
||||
|
||||
try:
|
||||
basestring
|
||||
WHICH_PYTHON = 2
|
||||
except NameError:
|
||||
WHICH_PYTHON = 3
|
||||
|
||||
if WHICH_PYTHON == 2:
|
||||
basestring = basestring
|
||||
bigint = long
|
||||
NUMBERS = DECIMALS + (int, float, long)
|
||||
import __builtin__ as builtins
|
||||
else:
|
||||
basestring = str
|
||||
bigint = int
|
||||
NUMBERS = DECIMALS + (int, float)
|
||||
import builtins
|
||||
|
||||
if WHICH_PYTHON == 2:
|
||||
from SimpleHTTPServer import SimpleHTTPRequestHandler
|
||||
from SocketServer import TCPServer
|
||||
from Queue import PriorityQueue, Empty as QueueEmpty
|
||||
from thread import get_ident
|
||||
else:
|
||||
from http.server import SimpleHTTPRequestHandler
|
||||
from socketserver import TCPServer
|
||||
from queue import PriorityQueue, Empty as QueueEmpty
|
||||
from threading import get_ident
|
||||
|
||||
|
||||
def to_unicode(s):
|
||||
if WHICH_PYTHON == 2:
|
||||
return unicode(s)
|
||||
else:
|
||||
return str(s)
|
||||
|
||||
|
||||
def to_string(s):
|
||||
if WHICH_PYTHON == 2:
|
||||
if isinstance(s, unicode):
|
||||
return s
|
||||
elif isinstance(s, basestring):
|
||||
return to_unicode(s)
|
||||
else:
|
||||
return to_unicode(str(s))
|
||||
else:
|
||||
if isinstance(s, basestring):
|
||||
return s
|
||||
else:
|
||||
return str(s)
|
||||
|
||||
|
||||
def to_native_string(s):
|
||||
if WHICH_PYTHON == 2:
|
||||
if isinstance(s, unicode):
|
||||
return str(s)
|
||||
elif isinstance(s, basestring):
|
||||
return s
|
||||
else:
|
||||
return str(s)
|
||||
else:
|
||||
if isinstance(s, basestring):
|
||||
return s
|
||||
else:
|
||||
return str(s)
|
||||
|
||||
|
||||
def write_file(path, s):
|
||||
if WHICH_PYTHON == 2:
|
||||
open = codecs.open
|
||||
else:
|
||||
open = builtins.open
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
return f.write(to_string(s))
|
||||
|
||||
|
||||
def open_file(path):
|
||||
"""Open the path for reading. It must be utf-8 encoded."""
|
||||
if WHICH_PYTHON == 2:
|
||||
open = codecs.open
|
||||
else:
|
||||
open = builtins.open
|
||||
return open(path, encoding='utf-8')
|
||||
|
||||
|
||||
if WHICH_PYTHON == 2:
|
||||
BOM_UTF8 = codecs.BOM_UTF8
|
||||
else:
|
||||
BOM_UTF8 = codecs.BOM_UTF8.decode('utf-8')
|
||||
|
||||
|
||||
def open_seed_file(path):
|
||||
if WHICH_PYTHON == 2:
|
||||
fp = open(path, 'Urb')
|
||||
else:
|
||||
fp = open(path, encoding='utf-8')
|
||||
return fp
|
||||
|
||||
|
||||
if WHICH_PYTHON == 2:
|
||||
# In python 2, classmethod and staticmethod do not allow setters, so you
|
||||
# can't treat classmethods as first-class objects like you can regular
|
||||
# functions. This rarely matters, but for metaclass shenanigans on the
|
||||
# adapter we do want to set attributes on classmethods.
|
||||
class _classmethod(classmethod):
|
||||
pass
|
||||
|
||||
classmethod = _classmethod
|
||||
|
||||
# python 2.7 is missing this
|
||||
class abstractclassmethod(classmethod):
|
||||
__isabstractmethod__ = True
|
||||
|
||||
def __init__(self, func):
|
||||
func.__isabstractmethod__ = True
|
||||
super(abstractclassmethod, self).__init__(func)
|
||||
|
||||
class abstractstaticmethod(staticmethod):
|
||||
__isabstractmethod__ = True
|
||||
|
||||
def __init__(self, func):
|
||||
func.__isabstractmethod__ = True
|
||||
super(abstractstaticmethod, self).__init__(func)
|
||||
|
||||
else:
|
||||
abstractclassmethod = abc.abstractclassmethod
|
||||
abstractstaticmethod = abc.abstractstaticmethod
|
||||
classmethod = classmethod
|
||||
|
||||
|
||||
def suppress_warnings():
|
||||
# in python 2, ResourceWarnings don't exist.
|
||||
# in python 3, suppress ResourceWarnings about unclosed sockets, as the
|
||||
# bigquery library never closes them.
|
||||
if WHICH_PYTHON == 3:
|
||||
warnings.filterwarnings("ignore", category=ResourceWarning,
|
||||
message="unclosed.*<socket.socket.*>")
|
||||
@@ -1,15 +1,12 @@
|
||||
import itertools
|
||||
import os
|
||||
import json
|
||||
from collections import OrderedDict, defaultdict
|
||||
import sqlparse
|
||||
from collections import defaultdict
|
||||
|
||||
import dbt.utils
|
||||
import dbt.include
|
||||
import dbt.tracking
|
||||
|
||||
from dbt.utils import get_materialization, NodeType, is_type
|
||||
|
||||
from dbt.linker import Linker
|
||||
|
||||
import dbt.compat
|
||||
@@ -19,33 +16,32 @@ import dbt.exceptions
|
||||
import dbt.flags
|
||||
import dbt.loader
|
||||
import dbt.config
|
||||
from dbt.contracts.graph.compiled import CompiledNode, CompiledGraph
|
||||
from dbt.contracts.graph.compiled import CompiledNode
|
||||
|
||||
from dbt.clients.system import write_json
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
graph_file_name = 'graph.gpickle'
|
||||
manifest_file_name = 'manifest.json'
|
||||
|
||||
|
||||
def print_compile_stats(stats):
|
||||
names = {
|
||||
NodeType.Model: 'models',
|
||||
NodeType.Test: 'tests',
|
||||
NodeType.Archive: 'archives',
|
||||
NodeType.Analysis: 'analyses',
|
||||
NodeType.Macro: 'macros',
|
||||
NodeType.Operation: 'operations',
|
||||
NodeType.Seed: 'seed files',
|
||||
NodeType.Model: 'model',
|
||||
NodeType.Test: 'test',
|
||||
NodeType.Snapshot: 'snapshot',
|
||||
NodeType.Analysis: 'analyse',
|
||||
NodeType.Macro: 'macro',
|
||||
NodeType.Operation: 'operation',
|
||||
NodeType.Seed: 'seed file',
|
||||
NodeType.Source: 'source',
|
||||
}
|
||||
|
||||
results = {k: 0 for k in names.keys()}
|
||||
results.update(stats)
|
||||
|
||||
stat_line = ", ".join(
|
||||
["{} {}".format(ct, names.get(t)) for t, ct in results.items()])
|
||||
[dbt.utils.pluralize(ct, names.get(t)) for t, ct in results.items()])
|
||||
|
||||
logger.info("Found {}".format(stat_line))
|
||||
logger.notice("Found {}".format(stat_line))
|
||||
|
||||
|
||||
def _add_prepended_cte(prepended_ctes, new_cte):
|
||||
@@ -72,8 +68,8 @@ def recursively_prepend_ctes(model, manifest):
|
||||
return (model, model.extra_ctes, manifest)
|
||||
|
||||
if dbt.flags.STRICT_MODE:
|
||||
# ensure that all the nodes in this manifest are compiled
|
||||
CompiledGraph(**manifest.to_flat_graph())
|
||||
# ensure that the cte we're adding to is compiled
|
||||
CompiledNode(**model.serialize())
|
||||
|
||||
prepended_ctes = []
|
||||
|
||||
@@ -82,7 +78,6 @@ def recursively_prepend_ctes(model, manifest):
|
||||
cte_to_add = manifest.nodes.get(cte_id)
|
||||
cte_to_add, new_prepended_ctes, manifest = recursively_prepend_ctes(
|
||||
cte_to_add, manifest)
|
||||
|
||||
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
|
||||
new_cte_name = '__dbt__CTE__{}'.format(cte_to_add.get('name'))
|
||||
sql = ' {} as (\n{}\n)'.format(new_cte_name, cte_to_add.compiled_sql)
|
||||
@@ -132,22 +127,23 @@ class Compiler(object):
|
||||
|
||||
injected_node, _ = prepend_ctes(compiled_node, manifest)
|
||||
|
||||
should_wrap = {NodeType.Test, NodeType.Analysis, NodeType.Operation}
|
||||
should_wrap = {NodeType.Test, NodeType.Operation}
|
||||
if injected_node.resource_type in should_wrap:
|
||||
# data tests get wrapped in count(*)
|
||||
# TODO : move this somewhere more reasonable
|
||||
if 'data' in injected_node.tags and \
|
||||
is_type(injected_node, NodeType.Test):
|
||||
injected_node.wrapped_sql = (
|
||||
"select count(*) from (\n{test_sql}\n) sbq").format(
|
||||
"select count(*) as errors "
|
||||
"from (\n{test_sql}\n) sbq").format(
|
||||
test_sql=injected_node.injected_sql)
|
||||
else:
|
||||
# don't wrap schema tests or analyses.
|
||||
injected_node.wrapped_sql = injected_node.injected_sql
|
||||
|
||||
elif is_type(injected_node, NodeType.Archive):
|
||||
elif is_type(injected_node, NodeType.Snapshot):
|
||||
# unfortunately we do everything automagically for
|
||||
# archives. in the future it'd be nice to generate
|
||||
# snapshots. in the future it'd be nice to generate
|
||||
# the SQL at the parser level.
|
||||
pass
|
||||
|
||||
@@ -160,33 +156,19 @@ class Compiler(object):
|
||||
|
||||
return injected_node
|
||||
|
||||
def write_manifest_file(self, manifest):
|
||||
"""Write the manifest file to disk.
|
||||
|
||||
manifest should be a Manifest.
|
||||
"""
|
||||
filename = manifest_file_name
|
||||
manifest_path = os.path.join(self.config.target_path, filename)
|
||||
write_json(manifest_path, manifest.serialize())
|
||||
|
||||
def write_graph_file(self, linker):
|
||||
def write_graph_file(self, linker, manifest):
|
||||
filename = graph_file_name
|
||||
graph_path = os.path.join(self.config.target_path, filename)
|
||||
linker.write_graph(graph_path)
|
||||
linker.write_graph(graph_path, manifest)
|
||||
|
||||
def link_node(self, linker, node, manifest):
|
||||
linker.add_node(node.unique_id)
|
||||
|
||||
linker.update_node_data(
|
||||
node.unique_id,
|
||||
node.to_dict())
|
||||
|
||||
for dependency in node.depends_on_nodes:
|
||||
if manifest.nodes.get(dependency):
|
||||
linker.dependency(
|
||||
node.unique_id,
|
||||
(manifest.nodes.get(dependency).unique_id))
|
||||
|
||||
else:
|
||||
dbt.exceptions.dependency_not_found(node, dependency)
|
||||
|
||||
@@ -199,58 +181,9 @@ class Compiler(object):
|
||||
if cycle:
|
||||
raise RuntimeError("Found a cycle: {}".format(cycle))
|
||||
|
||||
def get_all_projects(self):
|
||||
all_projects = {self.config.project_name: self.config}
|
||||
dependency_projects = dbt.utils.dependency_projects(self.config)
|
||||
|
||||
for project_cfg in dependency_projects:
|
||||
name = project_cfg.project_name
|
||||
all_projects[name] = project_cfg
|
||||
|
||||
if dbt.flags.STRICT_MODE:
|
||||
dbt.contracts.project.ProjectList(**all_projects)
|
||||
|
||||
return all_projects
|
||||
|
||||
def _check_resource_uniqueness(cls, manifest):
|
||||
names_resources = {}
|
||||
alias_resources = {}
|
||||
|
||||
for resource, node in manifest.nodes.items():
|
||||
if node.resource_type not in NodeType.refable():
|
||||
continue
|
||||
|
||||
name = node.name
|
||||
alias = "{}.{}".format(node.schema, node.alias)
|
||||
|
||||
existing_node = names_resources.get(name)
|
||||
if existing_node is not None:
|
||||
dbt.exceptions.raise_duplicate_resource_name(
|
||||
existing_node, node)
|
||||
|
||||
existing_alias = alias_resources.get(alias)
|
||||
if existing_alias is not None:
|
||||
dbt.exceptions.raise_ambiguous_alias(
|
||||
existing_alias, node)
|
||||
|
||||
names_resources[name] = node
|
||||
alias_resources[alias] = node
|
||||
|
||||
def compile(self):
|
||||
def compile(self, manifest, write=True):
|
||||
linker = Linker()
|
||||
|
||||
all_projects = self.get_all_projects()
|
||||
|
||||
manifest = dbt.loader.GraphLoader.load_all(self.config, all_projects)
|
||||
|
||||
self.write_manifest_file(manifest)
|
||||
|
||||
self._check_resource_uniqueness(manifest)
|
||||
|
||||
resource_fqns = manifest.get_resource_fqns()
|
||||
self.config.warn_for_unused_resource_config_paths(resource_fqns,
|
||||
manifest.disabled)
|
||||
|
||||
self.link_graph(linker, manifest)
|
||||
|
||||
stats = defaultdict(int)
|
||||
@@ -260,7 +193,60 @@ class Compiler(object):
|
||||
manifest.macros.items()):
|
||||
stats[node.resource_type] += 1
|
||||
|
||||
self.write_graph_file(linker)
|
||||
if write:
|
||||
self.write_graph_file(linker, manifest)
|
||||
print_compile_stats(stats)
|
||||
|
||||
return manifest, linker
|
||||
return linker
|
||||
|
||||
|
||||
def compile_manifest(config, manifest, write=True):
|
||||
compiler = Compiler(config)
|
||||
compiler.initialize()
|
||||
return compiler.compile(manifest, write=write)
|
||||
|
||||
|
||||
def _is_writable(node):
|
||||
if not node.injected_sql:
|
||||
return False
|
||||
|
||||
if dbt.utils.is_type(node, NodeType.Snapshot):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def compile_node(adapter, config, node, manifest, extra_context, write=True):
|
||||
compiler = Compiler(config)
|
||||
node = compiler.compile_node(node, manifest, extra_context)
|
||||
node = _inject_runtime_config(adapter, node, extra_context)
|
||||
|
||||
if write and _is_writable(node):
|
||||
logger.debug('Writing injected SQL for node "{}"'.format(
|
||||
node.unique_id))
|
||||
|
||||
written_path = dbt.writer.write_node(
|
||||
node,
|
||||
config.target_path,
|
||||
'compiled',
|
||||
node.injected_sql)
|
||||
|
||||
node.build_path = written_path
|
||||
|
||||
return node
|
||||
|
||||
|
||||
def _inject_runtime_config(adapter, node, extra_context):
|
||||
wrapped_sql = node.wrapped_sql
|
||||
context = _node_context(adapter, node)
|
||||
context.update(extra_context)
|
||||
sql = dbt.clients.jinja.get_rendered(wrapped_sql, context)
|
||||
node.wrapped_sql = sql
|
||||
return node
|
||||
|
||||
|
||||
def _node_context(adapter, node):
|
||||
return {
|
||||
"run_started_at": dbt.tracking.active_user.run_started_at,
|
||||
"invocation_id": dbt.tracking.active_user.invocation_id,
|
||||
}
|
||||
5
core/dbt/config/__init__.py
Normal file
5
core/dbt/config/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# all these are just exports, they need "noqa" so flake8 will not complain.
|
||||
from .renderer import ConfigRenderer # noqa
|
||||
from .profile import Profile, UserConfig, PROFILES_DIR # noqa
|
||||
from .project import Project # noqa
|
||||
from .runtime import RuntimeConfig # noqa
|
||||
382
core/dbt/config/profile.py
Normal file
382
core/dbt/config/profile.py
Normal file
@@ -0,0 +1,382 @@
|
||||
import os
|
||||
|
||||
from dbt.adapters.factory import load_plugin
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.contracts.project import ProfileConfig
|
||||
from dbt.exceptions import DbtProfileError
|
||||
from dbt.exceptions import DbtProjectError
|
||||
from dbt.exceptions import ValidationException
|
||||
from dbt.exceptions import RuntimeException
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.utils import parse_cli_vars
|
||||
from dbt import tracking
|
||||
from dbt.ui import printer
|
||||
|
||||
from .renderer import ConfigRenderer
|
||||
|
||||
DEFAULT_THREADS = 1
|
||||
DEFAULT_SEND_ANONYMOUS_USAGE_STATS = True
|
||||
DEFAULT_USE_COLORS = True
|
||||
DEFAULT_PROFILES_DIR = os.path.join(os.path.expanduser('~'), '.dbt')
|
||||
PROFILES_DIR = os.path.expanduser(
|
||||
os.environ.get('DBT_PROFILES_DIR', DEFAULT_PROFILES_DIR)
|
||||
)
|
||||
|
||||
INVALID_PROFILE_MESSAGE = """
|
||||
dbt encountered an error while trying to read your profiles.yml file.
|
||||
|
||||
{error_string}
|
||||
"""
|
||||
|
||||
|
||||
NO_SUPPLIED_PROFILE_ERROR = """\
|
||||
dbt cannot run because no profile was specified for this dbt project.
|
||||
To specify a profile for this project, add a line like the this to
|
||||
your dbt_project.yml file:
|
||||
|
||||
profile: [profile name]
|
||||
|
||||
Here, [profile name] should be replaced with a profile name
|
||||
defined in your profiles.yml file. You can find profiles.yml here:
|
||||
|
||||
{profiles_file}/profiles.yml
|
||||
""".format(profiles_file=PROFILES_DIR)
|
||||
|
||||
|
||||
def read_profile(profiles_dir):
|
||||
path = os.path.join(profiles_dir, 'profiles.yml')
|
||||
|
||||
contents = None
|
||||
if os.path.isfile(path):
|
||||
try:
|
||||
contents = load_file_contents(path, strip=False)
|
||||
return load_yaml_text(contents)
|
||||
except ValidationException as e:
|
||||
msg = INVALID_PROFILE_MESSAGE.format(error_string=e)
|
||||
raise ValidationException(msg)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
class UserConfig(object):
|
||||
def __init__(self, send_anonymous_usage_stats, use_colors, printer_width):
|
||||
self.send_anonymous_usage_stats = send_anonymous_usage_stats
|
||||
self.use_colors = use_colors
|
||||
self.printer_width = printer_width
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, cfg=None):
|
||||
if cfg is None:
|
||||
cfg = {}
|
||||
send_anonymous_usage_stats = cfg.get(
|
||||
'send_anonymous_usage_stats',
|
||||
DEFAULT_SEND_ANONYMOUS_USAGE_STATS
|
||||
)
|
||||
use_colors = cfg.get(
|
||||
'use_colors',
|
||||
DEFAULT_USE_COLORS
|
||||
)
|
||||
printer_width = cfg.get(
|
||||
'printer_width'
|
||||
)
|
||||
return cls(send_anonymous_usage_stats, use_colors, printer_width)
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'send_anonymous_usage_stats': self.send_anonymous_usage_stats,
|
||||
'use_colors': self.use_colors,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_directory(cls, directory):
|
||||
user_cfg = None
|
||||
profile = read_profile(directory)
|
||||
if profile:
|
||||
user_cfg = profile.get('config', {})
|
||||
return cls.from_dict(user_cfg)
|
||||
|
||||
def set_values(self, cookie_dir):
|
||||
if self.send_anonymous_usage_stats:
|
||||
tracking.initialize_tracking(cookie_dir)
|
||||
else:
|
||||
tracking.do_not_track()
|
||||
|
||||
if self.use_colors:
|
||||
printer.use_colors()
|
||||
|
||||
if self.printer_width:
|
||||
printer.printer_width(self.printer_width)
|
||||
|
||||
|
||||
class Profile(object):
|
||||
def __init__(self, profile_name, target_name, config, threads,
|
||||
credentials):
|
||||
self.profile_name = profile_name
|
||||
self.target_name = target_name
|
||||
if isinstance(config, dict):
|
||||
config = UserConfig.from_dict(config)
|
||||
self.config = config
|
||||
self.threads = threads
|
||||
self.credentials = credentials
|
||||
|
||||
def to_profile_info(self, serialize_credentials=False):
|
||||
"""Unlike to_project_config, this dict is not a mirror of any existing
|
||||
on-disk data structure. It's used when creating a new profile from an
|
||||
existing one.
|
||||
|
||||
:param serialize_credentials bool: If True, serialize the credentials.
|
||||
Otherwise, the Credentials object will be copied.
|
||||
:returns dict: The serialized profile.
|
||||
"""
|
||||
result = {
|
||||
'profile_name': self.profile_name,
|
||||
'target_name': self.target_name,
|
||||
'config': self.config.to_dict(),
|
||||
'threads': self.threads,
|
||||
'credentials': self.credentials.incorporate(),
|
||||
}
|
||||
if serialize_credentials:
|
||||
result['credentials'] = result['credentials'].serialize()
|
||||
return result
|
||||
|
||||
def __str__(self):
|
||||
return str(self.to_profile_info())
|
||||
|
||||
def __eq__(self, other):
|
||||
if not (isinstance(other, self.__class__) and
|
||||
isinstance(self, other.__class__)):
|
||||
return False
|
||||
return self.to_profile_info() == other.to_profile_info()
|
||||
|
||||
def validate(self):
|
||||
if self.credentials:
|
||||
self.credentials.validate()
|
||||
try:
|
||||
ProfileConfig(**self.to_profile_info(serialize_credentials=True))
|
||||
except ValidationException as exc:
|
||||
raise DbtProfileError(str(exc))
|
||||
|
||||
@staticmethod
|
||||
def _credentials_from_profile(profile, profile_name, target_name):
|
||||
# credentials carry their 'type' in their actual type, not their
|
||||
# attributes. We do want this in order to pick our Credentials class.
|
||||
if 'type' not in profile:
|
||||
raise DbtProfileError(
|
||||
'required field "type" not found in profile {} and target {}'
|
||||
.format(profile_name, target_name))
|
||||
|
||||
typename = profile.pop('type')
|
||||
|
||||
try:
|
||||
cls = load_plugin(typename)
|
||||
credentials = cls(**profile)
|
||||
except RuntimeException as e:
|
||||
raise DbtProfileError(
|
||||
'Credentials in profile "{}", target "{}" invalid: {}'
|
||||
.format(profile_name, target_name, str(e))
|
||||
)
|
||||
return credentials
|
||||
|
||||
@staticmethod
|
||||
def pick_profile_name(args_profile_name, project_profile_name=None):
|
||||
profile_name = project_profile_name
|
||||
if args_profile_name is not None:
|
||||
profile_name = args_profile_name
|
||||
if profile_name is None:
|
||||
raise DbtProjectError(NO_SUPPLIED_PROFILE_ERROR)
|
||||
return profile_name
|
||||
|
||||
@staticmethod
|
||||
def _get_profile_data(profile, profile_name, target_name):
|
||||
if 'outputs' not in profile:
|
||||
raise DbtProfileError(
|
||||
"outputs not specified in profile '{}'".format(profile_name)
|
||||
)
|
||||
outputs = profile['outputs']
|
||||
|
||||
if target_name not in outputs:
|
||||
outputs = '\n'.join(' - {}'.format(output)
|
||||
for output in outputs)
|
||||
msg = ("The profile '{}' does not have a target named '{}'. The "
|
||||
"valid target names for this profile are:\n{}"
|
||||
.format(profile_name, target_name, outputs))
|
||||
raise DbtProfileError(msg, result_type='invalid_target')
|
||||
profile_data = outputs[target_name]
|
||||
return profile_data
|
||||
|
||||
@classmethod
|
||||
def from_credentials(cls, credentials, threads, profile_name, target_name,
|
||||
user_cfg=None):
|
||||
"""Create a profile from an existing set of Credentials and the
|
||||
remaining information.
|
||||
|
||||
:param credentials dict: The credentials dict for this profile.
|
||||
:param threads int: The number of threads to use for connections.
|
||||
:param profile_name str: The profile name used for this profile.
|
||||
:param target_name str: The target name used for this profile.
|
||||
:param user_cfg Optional[dict]: The user-level config block from the
|
||||
raw profiles, if specified.
|
||||
:raises DbtProfileError: If the profile is invalid.
|
||||
:returns Profile: The new Profile object.
|
||||
"""
|
||||
config = UserConfig.from_dict(user_cfg)
|
||||
profile = cls(
|
||||
profile_name=profile_name,
|
||||
target_name=target_name,
|
||||
config=config,
|
||||
threads=threads,
|
||||
credentials=credentials
|
||||
)
|
||||
profile.validate()
|
||||
return profile
|
||||
|
||||
@classmethod
|
||||
def render_profile(cls, raw_profile, profile_name, target_override,
|
||||
cli_vars):
|
||||
"""This is a containment zone for the hateful way we're rendering
|
||||
profiles.
|
||||
"""
|
||||
renderer = ConfigRenderer(cli_vars=cli_vars)
|
||||
|
||||
# rendering profiles is a bit complex. Two constraints cause trouble:
|
||||
# 1) users should be able to use environment/cli variables to specify
|
||||
# the target in their profile.
|
||||
# 2) Missing environment/cli variables in profiles/targets that don't
|
||||
# end up getting selected should not cause errors.
|
||||
# so first we'll just render the target name, then we use that rendered
|
||||
# name to extract a profile that we can render.
|
||||
if target_override is not None:
|
||||
target_name = target_override
|
||||
elif 'target' in raw_profile:
|
||||
# render the target if it was parsed from yaml
|
||||
target_name = renderer.render_value(raw_profile['target'])
|
||||
else:
|
||||
target_name = 'default'
|
||||
logger.debug(
|
||||
"target not specified in profile '{}', using '{}'"
|
||||
.format(profile_name, target_name)
|
||||
)
|
||||
|
||||
raw_profile_data = cls._get_profile_data(
|
||||
raw_profile, profile_name, target_name
|
||||
)
|
||||
|
||||
profile_data = renderer.render_profile_data(raw_profile_data)
|
||||
return target_name, profile_data
|
||||
|
||||
@classmethod
|
||||
def from_raw_profile_info(cls, raw_profile, profile_name, cli_vars,
|
||||
user_cfg=None, target_override=None,
|
||||
threads_override=None):
|
||||
"""Create a profile from its raw profile information.
|
||||
|
||||
(this is an intermediate step, mostly useful for unit testing)
|
||||
|
||||
:param raw_profile dict: The profile data for a single profile, from
|
||||
disk as yaml and its values rendered with jinja.
|
||||
:param profile_name str: The profile name used.
|
||||
:param cli_vars dict: The command-line variables passed as arguments,
|
||||
as a dict.
|
||||
:param user_cfg Optional[dict]: The global config for the user, if it
|
||||
was present.
|
||||
:param target_override Optional[str]: The target to use, if provided on
|
||||
the command line.
|
||||
:param threads_override Optional[str]: The thread count to use, if
|
||||
provided on the command line.
|
||||
:raises DbtProfileError: If the profile is invalid or missing, or the
|
||||
target could not be found
|
||||
:returns Profile: The new Profile object.
|
||||
"""
|
||||
# user_cfg is not rendered since it only contains booleans.
|
||||
# TODO: should it be, and the values coerced to bool?
|
||||
target_name, profile_data = cls.render_profile(
|
||||
raw_profile, profile_name, target_override, cli_vars
|
||||
)
|
||||
|
||||
# valid connections never include the number of threads, but it's
|
||||
# stored on a per-connection level in the raw configs
|
||||
threads = profile_data.pop('threads', DEFAULT_THREADS)
|
||||
if threads_override is not None:
|
||||
threads = threads_override
|
||||
|
||||
credentials = cls._credentials_from_profile(
|
||||
profile_data, profile_name, target_name
|
||||
)
|
||||
|
||||
return cls.from_credentials(
|
||||
credentials=credentials,
|
||||
profile_name=profile_name,
|
||||
target_name=target_name,
|
||||
threads=threads,
|
||||
user_cfg=user_cfg
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_raw_profiles(cls, raw_profiles, profile_name, cli_vars,
|
||||
target_override=None, threads_override=None):
|
||||
"""
|
||||
:param raw_profiles dict: The profile data, from disk as yaml.
|
||||
:param profile_name str: The profile name to use.
|
||||
:param cli_vars dict: The command-line variables passed as arguments,
|
||||
as a dict.
|
||||
:param target_override Optional[str]: The target to use, if provided on
|
||||
the command line.
|
||||
:param threads_override Optional[str]: The thread count to use, if
|
||||
provided on the command line.
|
||||
:raises DbtProjectError: If there is no profile name specified in the
|
||||
project or the command line arguments
|
||||
:raises DbtProfileError: If the profile is invalid or missing, or the
|
||||
target could not be found
|
||||
:returns Profile: The new Profile object.
|
||||
"""
|
||||
if profile_name not in raw_profiles:
|
||||
raise DbtProjectError(
|
||||
"Could not find profile named '{}'".format(profile_name)
|
||||
)
|
||||
|
||||
# First, we've already got our final decision on profile name, and we
|
||||
# don't render keys, so we can pluck that out
|
||||
raw_profile = raw_profiles[profile_name]
|
||||
|
||||
user_cfg = raw_profiles.get('config')
|
||||
|
||||
return cls.from_raw_profile_info(
|
||||
raw_profile=raw_profile,
|
||||
profile_name=profile_name,
|
||||
cli_vars=cli_vars,
|
||||
user_cfg=user_cfg,
|
||||
target_override=target_override,
|
||||
threads_override=threads_override,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args, project_profile_name=None):
|
||||
"""Given the raw profiles as read from disk and the name of the desired
|
||||
profile if specified, return the profile component of the runtime
|
||||
config.
|
||||
|
||||
:param args argparse.Namespace: The arguments as parsed from the cli.
|
||||
:param project_profile_name Optional[str]: The profile name, if
|
||||
specified in a project.
|
||||
:raises DbtProjectError: If there is no profile name specified in the
|
||||
project or the command line arguments, or if the specified profile
|
||||
is not found
|
||||
:raises DbtProfileError: If the profile is invalid or missing, or the
|
||||
target could not be found.
|
||||
:returns Profile: The new Profile object.
|
||||
"""
|
||||
cli_vars = parse_cli_vars(getattr(args, 'vars', '{}'))
|
||||
threads_override = getattr(args, 'threads', None)
|
||||
target_override = getattr(args, 'target', None)
|
||||
raw_profiles = read_profile(args.profiles_dir)
|
||||
profile_name = cls.pick_profile_name(args.profile,
|
||||
project_profile_name)
|
||||
|
||||
return cls.from_raw_profiles(
|
||||
raw_profiles=raw_profiles,
|
||||
profile_name=profile_name,
|
||||
cli_vars=cli_vars,
|
||||
target_override=target_override,
|
||||
threads_override=threads_override
|
||||
)
|
||||
449
core/dbt/config/project.py
Normal file
449
core/dbt/config/project.py
Normal file
@@ -0,0 +1,449 @@
|
||||
from copy import deepcopy
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
from dbt import compat
|
||||
from dbt.clients.system import resolve_path_from_base
|
||||
from dbt.clients.system import path_exists
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.exceptions import DbtProjectError
|
||||
from dbt.exceptions import RecursionException
|
||||
from dbt.exceptions import SemverException
|
||||
from dbt.exceptions import ValidationException
|
||||
from dbt.exceptions import warn_or_error
|
||||
from dbt.semver import VersionSpecifier
|
||||
from dbt.semver import versions_compatible
|
||||
from dbt.version import get_installed_version
|
||||
from dbt.ui import printer
|
||||
from dbt.utils import deep_map
|
||||
from dbt.utils import parse_cli_vars
|
||||
from dbt.parser.source_config import SourceConfig
|
||||
|
||||
from dbt.contracts.project import Project as ProjectContract
|
||||
from dbt.contracts.project import PackageConfig
|
||||
|
||||
from .renderer import ConfigRenderer
|
||||
|
||||
|
||||
UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE = """\
|
||||
WARNING: Configuration paths exist in your dbt_project.yml file which do not \
|
||||
apply to any resources.
|
||||
There are {} unused configuration paths:\n{}
|
||||
"""
|
||||
|
||||
|
||||
INVALID_VERSION_ERROR = """\
|
||||
This version of dbt is not supported with the '{package}' package.
|
||||
Installed version of dbt: {installed}
|
||||
Required version of dbt for '{package}': {version_spec}
|
||||
Check the requirements for the '{package}' package, or run dbt again with \
|
||||
--no-version-check
|
||||
"""
|
||||
|
||||
|
||||
IMPOSSIBLE_VERSION_ERROR = """\
|
||||
The package version requirement can never be satisfied for the '{package}
|
||||
package.
|
||||
Required versions of dbt for '{package}': {version_spec}
|
||||
Check the requirements for the '{package}' package, or run dbt again with \
|
||||
--no-version-check
|
||||
"""
|
||||
|
||||
|
||||
def _list_if_none(value):
|
||||
if value is None:
|
||||
value = []
|
||||
return value
|
||||
|
||||
|
||||
def _dict_if_none(value):
|
||||
if value is None:
|
||||
value = {}
|
||||
return value
|
||||
|
||||
|
||||
def _list_if_none_or_string(value):
|
||||
value = _list_if_none(value)
|
||||
if isinstance(value, compat.basestring):
|
||||
return [value]
|
||||
return value
|
||||
|
||||
|
||||
def _load_yaml(path):
|
||||
contents = load_file_contents(path)
|
||||
return load_yaml_text(contents)
|
||||
|
||||
|
||||
def _get_config_paths(config, path=(), paths=None):
|
||||
if paths is None:
|
||||
paths = set()
|
||||
|
||||
for key, value in config.items():
|
||||
if isinstance(value, dict):
|
||||
if key in SourceConfig.ConfigKeys:
|
||||
if path not in paths:
|
||||
paths.add(path)
|
||||
else:
|
||||
_get_config_paths(value, path + (key,), paths)
|
||||
else:
|
||||
if path not in paths:
|
||||
paths.add(path)
|
||||
|
||||
return frozenset(paths)
|
||||
|
||||
|
||||
def _is_config_used(path, fqns):
|
||||
if fqns:
|
||||
for fqn in fqns:
|
||||
if len(path) <= len(fqn) and fqn[:len(path)] == path:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def package_data_from_root(project_root):
|
||||
package_filepath = resolve_path_from_base(
|
||||
'packages.yml', project_root
|
||||
)
|
||||
|
||||
if path_exists(package_filepath):
|
||||
packages_dict = _load_yaml(package_filepath)
|
||||
else:
|
||||
packages_dict = None
|
||||
return packages_dict
|
||||
|
||||
|
||||
def package_config_from_data(packages_data):
|
||||
if packages_data is None:
|
||||
packages_data = {'packages': []}
|
||||
|
||||
try:
|
||||
packages = PackageConfig(**packages_data)
|
||||
except ValidationException as e:
|
||||
raise DbtProjectError('Invalid package config: {}'.format(str(e)))
|
||||
return packages
|
||||
|
||||
|
||||
def _parse_versions(versions):
|
||||
"""Parse multiple versions as read from disk. The versions value may be any
|
||||
one of:
|
||||
- a single version string ('>0.12.1')
|
||||
- a single string specifying multiple comma-separated versions
|
||||
('>0.11.1,<=0.12.2')
|
||||
- an array of single-version strings (['>0.11.1', '<=0.12.2'])
|
||||
|
||||
Regardless, this will return a list of VersionSpecifiers
|
||||
"""
|
||||
if isinstance(versions, compat.basestring):
|
||||
versions = versions.split(',')
|
||||
return [VersionSpecifier.from_version_string(v) for v in versions]
|
||||
|
||||
|
||||
class Project(object):
|
||||
def __init__(self, project_name, version, project_root, profile_name,
|
||||
source_paths, macro_paths, data_paths, test_paths,
|
||||
analysis_paths, docs_paths, target_path, snapshot_paths,
|
||||
clean_targets, log_path, modules_path, quoting, models,
|
||||
on_run_start, on_run_end, archive, seeds, dbt_version,
|
||||
packages):
|
||||
self.project_name = project_name
|
||||
self.version = version
|
||||
self.project_root = project_root
|
||||
self.profile_name = profile_name
|
||||
self.source_paths = source_paths
|
||||
self.macro_paths = macro_paths
|
||||
self.data_paths = data_paths
|
||||
self.test_paths = test_paths
|
||||
self.analysis_paths = analysis_paths
|
||||
self.docs_paths = docs_paths
|
||||
self.target_path = target_path
|
||||
self.snapshot_paths = snapshot_paths
|
||||
self.clean_targets = clean_targets
|
||||
self.log_path = log_path
|
||||
self.modules_path = modules_path
|
||||
self.quoting = quoting
|
||||
self.models = models
|
||||
self.on_run_start = on_run_start
|
||||
self.on_run_end = on_run_end
|
||||
self.archive = archive
|
||||
self.seeds = seeds
|
||||
self.dbt_version = dbt_version
|
||||
self.packages = packages
|
||||
|
||||
@staticmethod
|
||||
def _preprocess(project_dict):
|
||||
"""Pre-process certain special keys to convert them from None values
|
||||
into empty containers, and to turn strings into arrays of strings.
|
||||
"""
|
||||
handlers = {
|
||||
('archive',): _list_if_none,
|
||||
('on-run-start',): _list_if_none_or_string,
|
||||
('on-run-end',): _list_if_none_or_string,
|
||||
}
|
||||
|
||||
for k in ('models', 'seeds'):
|
||||
handlers[(k,)] = _dict_if_none
|
||||
handlers[(k, 'vars')] = _dict_if_none
|
||||
handlers[(k, 'pre-hook')] = _list_if_none_or_string
|
||||
handlers[(k, 'post-hook')] = _list_if_none_or_string
|
||||
handlers[('seeds', 'column_types')] = _dict_if_none
|
||||
|
||||
def converter(value, keypath):
|
||||
if keypath in handlers:
|
||||
handler = handlers[keypath]
|
||||
return handler(value)
|
||||
else:
|
||||
return value
|
||||
|
||||
return deep_map(converter, project_dict)
|
||||
|
||||
@classmethod
|
||||
def from_project_config(cls, project_dict, packages_dict=None):
|
||||
"""Create a project from its project and package configuration, as read
|
||||
by yaml.safe_load().
|
||||
|
||||
:param project_dict dict: The dictionary as read from disk
|
||||
:param packages_dict Optional[dict]: If it exists, the packages file as
|
||||
read from disk.
|
||||
:raises DbtProjectError: If the project is missing or invalid, or if
|
||||
the packages file exists and is invalid.
|
||||
:returns Project: The project, with defaults populated.
|
||||
"""
|
||||
try:
|
||||
project_dict = cls._preprocess(project_dict)
|
||||
except RecursionException:
|
||||
raise DbtProjectError(
|
||||
'Cycle detected: Project input has a reference to itself',
|
||||
project=project_dict
|
||||
)
|
||||
# just for validation.
|
||||
try:
|
||||
ProjectContract(**project_dict)
|
||||
except ValidationException as e:
|
||||
raise DbtProjectError(str(e))
|
||||
|
||||
# name/version are required in the Project definition, so we can assume
|
||||
# they are present
|
||||
name = project_dict['name']
|
||||
version = project_dict['version']
|
||||
# this is added at project_dict parse time and should always be here
|
||||
# once we see it.
|
||||
project_root = project_dict['project-root']
|
||||
# this is only optional in the sense that if it's not present, it needs
|
||||
# to have been a cli argument.
|
||||
profile_name = project_dict.get('profile')
|
||||
# these are optional
|
||||
source_paths = project_dict.get('source-paths', ['models'])
|
||||
macro_paths = project_dict.get('macro-paths', ['macros'])
|
||||
data_paths = project_dict.get('data-paths', ['data'])
|
||||
test_paths = project_dict.get('test-paths', ['test'])
|
||||
analysis_paths = project_dict.get('analysis-paths', [])
|
||||
docs_paths = project_dict.get('docs-paths', source_paths[:])
|
||||
target_path = project_dict.get('target-path', 'target')
|
||||
snapshot_paths = project_dict.get('snapshot-paths', ['snapshots'])
|
||||
# should this also include the modules path by default?
|
||||
clean_targets = project_dict.get('clean-targets', [target_path])
|
||||
log_path = project_dict.get('log-path', 'logs')
|
||||
modules_path = project_dict.get('modules-path', 'dbt_modules')
|
||||
# in the default case we'll populate this once we know the adapter type
|
||||
quoting = project_dict.get('quoting', {})
|
||||
|
||||
models = project_dict.get('models', {})
|
||||
on_run_start = project_dict.get('on-run-start', [])
|
||||
on_run_end = project_dict.get('on-run-end', [])
|
||||
archive = project_dict.get('archive', [])
|
||||
seeds = project_dict.get('seeds', {})
|
||||
dbt_raw_version = project_dict.get('require-dbt-version', '>=0.0.0')
|
||||
|
||||
try:
|
||||
dbt_version = _parse_versions(dbt_raw_version)
|
||||
except SemverException as e:
|
||||
raise DbtProjectError(str(e))
|
||||
|
||||
packages = package_config_from_data(packages_dict)
|
||||
|
||||
project = cls(
|
||||
project_name=name,
|
||||
version=version,
|
||||
project_root=project_root,
|
||||
profile_name=profile_name,
|
||||
source_paths=source_paths,
|
||||
macro_paths=macro_paths,
|
||||
data_paths=data_paths,
|
||||
test_paths=test_paths,
|
||||
analysis_paths=analysis_paths,
|
||||
docs_paths=docs_paths,
|
||||
target_path=target_path,
|
||||
snapshot_paths=snapshot_paths,
|
||||
clean_targets=clean_targets,
|
||||
log_path=log_path,
|
||||
modules_path=modules_path,
|
||||
quoting=quoting,
|
||||
models=models,
|
||||
on_run_start=on_run_start,
|
||||
on_run_end=on_run_end,
|
||||
archive=archive,
|
||||
seeds=seeds,
|
||||
dbt_version=dbt_version,
|
||||
packages=packages
|
||||
)
|
||||
# sanity check - this means an internal issue
|
||||
project.validate()
|
||||
return project
|
||||
|
||||
def __str__(self):
|
||||
cfg = self.to_project_config(with_packages=True)
|
||||
return str(cfg)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not (isinstance(other, self.__class__) and
|
||||
isinstance(self, other.__class__)):
|
||||
return False
|
||||
return self.to_project_config(with_packages=True) == \
|
||||
other.to_project_config(with_packages=True)
|
||||
|
||||
def to_project_config(self, with_packages=False):
|
||||
"""Return a dict representation of the config that could be written to
|
||||
disk with `yaml.safe_dump` to get this configuration.
|
||||
|
||||
:param with_packages bool: If True, include the serialized packages
|
||||
file in the root.
|
||||
:returns dict: The serialized profile.
|
||||
"""
|
||||
result = deepcopy({
|
||||
'name': self.project_name,
|
||||
'version': self.version,
|
||||
'project-root': self.project_root,
|
||||
'profile': self.profile_name,
|
||||
'source-paths': self.source_paths,
|
||||
'macro-paths': self.macro_paths,
|
||||
'data-paths': self.data_paths,
|
||||
'test-paths': self.test_paths,
|
||||
'analysis-paths': self.analysis_paths,
|
||||
'docs-paths': self.docs_paths,
|
||||
'target-path': self.target_path,
|
||||
'snapshot-paths': self.snapshot_paths,
|
||||
'clean-targets': self.clean_targets,
|
||||
'log-path': self.log_path,
|
||||
'quoting': self.quoting,
|
||||
'models': self.models,
|
||||
'on-run-start': self.on_run_start,
|
||||
'on-run-end': self.on_run_end,
|
||||
'archive': self.archive,
|
||||
'seeds': self.seeds,
|
||||
'require-dbt-version': [
|
||||
v.to_version_string() for v in self.dbt_version
|
||||
],
|
||||
})
|
||||
if with_packages:
|
||||
result.update(self.packages.serialize())
|
||||
return result
|
||||
|
||||
def validate(self):
|
||||
try:
|
||||
ProjectContract(**self.to_project_config())
|
||||
except ValidationException as exc:
|
||||
raise DbtProjectError(str(exc))
|
||||
|
||||
@classmethod
|
||||
def from_project_root(cls, project_root, cli_vars):
|
||||
"""Create a project from a root directory. Reads in dbt_project.yml and
|
||||
packages.yml, if it exists.
|
||||
|
||||
:param project_root str: The path to the project root to load.
|
||||
:raises DbtProjectError: If the project is missing or invalid, or if
|
||||
the packages file exists and is invalid.
|
||||
:returns Project: The project, with defaults populated.
|
||||
"""
|
||||
project_root = os.path.normpath(project_root)
|
||||
project_yaml_filepath = os.path.join(project_root, 'dbt_project.yml')
|
||||
|
||||
# get the project.yml contents
|
||||
if not path_exists(project_yaml_filepath):
|
||||
raise DbtProjectError(
|
||||
'no dbt_project.yml found at expected path {}'
|
||||
.format(project_yaml_filepath)
|
||||
)
|
||||
|
||||
if isinstance(cli_vars, compat.basestring):
|
||||
cli_vars = parse_cli_vars(cli_vars)
|
||||
renderer = ConfigRenderer(cli_vars)
|
||||
|
||||
project_dict = _load_yaml(project_yaml_filepath)
|
||||
rendered_project = renderer.render_project(project_dict)
|
||||
rendered_project['project-root'] = project_root
|
||||
packages_dict = package_data_from_root(project_root)
|
||||
return cls.from_project_config(rendered_project, packages_dict)
|
||||
|
||||
@classmethod
|
||||
def from_current_directory(cls, cli_vars):
|
||||
return cls.from_project_root(os.getcwd(), cli_vars)
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args):
|
||||
return cls.from_current_directory(getattr(args, 'vars', '{}'))
|
||||
|
||||
def hashed_name(self):
|
||||
return hashlib.md5(self.project_name.encode('utf-8')).hexdigest()
|
||||
|
||||
def get_resource_config_paths(self):
|
||||
"""Return a dictionary with 'seeds' and 'models' keys whose values are
|
||||
lists of lists of strings, where each inner list of strings represents
|
||||
a configured path in the resource.
|
||||
"""
|
||||
return {
|
||||
'models': _get_config_paths(self.models),
|
||||
'seeds': _get_config_paths(self.seeds),
|
||||
}
|
||||
|
||||
def get_unused_resource_config_paths(self, resource_fqns, disabled):
|
||||
"""Return a list of lists of strings, where each inner list of strings
|
||||
represents a type + FQN path of a resource configuration that is not
|
||||
used.
|
||||
"""
|
||||
disabled_fqns = frozenset(tuple(fqn) for fqn in disabled)
|
||||
resource_config_paths = self.get_resource_config_paths()
|
||||
unused_resource_config_paths = []
|
||||
for resource_type, config_paths in resource_config_paths.items():
|
||||
used_fqns = resource_fqns.get(resource_type, frozenset())
|
||||
fqns = used_fqns | disabled_fqns
|
||||
|
||||
for config_path in config_paths:
|
||||
if not _is_config_used(config_path, fqns):
|
||||
unused_resource_config_paths.append(
|
||||
(resource_type,) + config_path
|
||||
)
|
||||
return unused_resource_config_paths
|
||||
|
||||
def warn_for_unused_resource_config_paths(self, resource_fqns, disabled):
|
||||
unused = self.get_unused_resource_config_paths(resource_fqns, disabled)
|
||||
if len(unused) == 0:
|
||||
return
|
||||
|
||||
msg = UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE.format(
|
||||
len(unused),
|
||||
'\n'.join('- {}'.format('.'.join(u)) for u in unused)
|
||||
)
|
||||
warn_or_error(msg, log_fmt=printer.yellow('{}'))
|
||||
|
||||
def validate_version(self):
|
||||
"""Ensure this package works with the installed version of dbt."""
|
||||
installed = get_installed_version()
|
||||
if not versions_compatible(*self.dbt_version):
|
||||
msg = IMPOSSIBLE_VERSION_ERROR.format(
|
||||
package=self.project_name,
|
||||
version_spec=[
|
||||
x.to_version_string() for x in self.dbt_version
|
||||
]
|
||||
)
|
||||
raise DbtProjectError(msg)
|
||||
|
||||
if not versions_compatible(installed, *self.dbt_version):
|
||||
msg = INVALID_VERSION_ERROR.format(
|
||||
package=self.project_name,
|
||||
installed=installed.to_version_string(),
|
||||
version_spec=[
|
||||
x.to_version_string() for x in self.dbt_version
|
||||
]
|
||||
)
|
||||
raise DbtProjectError(msg)
|
||||
111
core/dbt/config/renderer.py
Normal file
111
core/dbt/config/renderer.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from dbt import compat
|
||||
from dbt.clients.jinja import get_rendered
|
||||
from dbt.context.common import generate_config_context
|
||||
from dbt.exceptions import DbtProfileError
|
||||
from dbt.exceptions import DbtProjectError
|
||||
from dbt.exceptions import RecursionException
|
||||
from dbt.utils import deep_map
|
||||
|
||||
|
||||
class ConfigRenderer(object):
|
||||
"""A renderer provides configuration rendering for a given set of cli
|
||||
variables and a render type.
|
||||
"""
|
||||
def __init__(self, cli_vars):
|
||||
self.context = generate_config_context(cli_vars)
|
||||
|
||||
@staticmethod
|
||||
def _is_hook_or_model_vars_path(keypath):
|
||||
if not keypath:
|
||||
return False
|
||||
|
||||
first = keypath[0]
|
||||
# run hooks
|
||||
if first in {'on-run-start', 'on-run-end'}:
|
||||
return True
|
||||
# models have two things to avoid
|
||||
if first in {'seeds', 'models'}:
|
||||
# model-level hooks
|
||||
if 'pre-hook' in keypath or 'post-hook' in keypath:
|
||||
return True
|
||||
# model-level 'vars' declarations
|
||||
if 'vars' in keypath:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _render_project_entry(self, value, keypath):
|
||||
"""Render an entry, in case it's jinja. This is meant to be passed to
|
||||
deep_map.
|
||||
|
||||
If the parsed entry is a string and has the name 'port', this will
|
||||
attempt to cast it to an int, and on failure will return the parsed
|
||||
string.
|
||||
|
||||
:param value Any: The value to potentially render
|
||||
:param key str: The key to convert on.
|
||||
:return Any: The rendered entry.
|
||||
"""
|
||||
# hooks should be treated as raw sql, they'll get rendered later.
|
||||
# Same goes for 'vars' declarations inside 'models'/'seeds'.
|
||||
if self._is_hook_or_model_vars_path(keypath):
|
||||
return value
|
||||
|
||||
return self.render_value(value)
|
||||
|
||||
def render_value(self, value, keypath=None):
|
||||
# keypath is ignored.
|
||||
# if it wasn't read as a string, ignore it
|
||||
if not isinstance(value, compat.basestring):
|
||||
return value
|
||||
# force the result of rendering into this python version's native
|
||||
# string type
|
||||
return compat.to_native_string(get_rendered(value, self.context))
|
||||
|
||||
def _render_profile_data(self, value, keypath):
|
||||
result = self.render_value(value)
|
||||
if len(keypath) == 1 and keypath[-1] == 'port':
|
||||
try:
|
||||
result = int(result)
|
||||
except ValueError:
|
||||
# let the validator or connection handle this
|
||||
pass
|
||||
return result
|
||||
|
||||
def _render_schema_source_data(self, value, keypath):
|
||||
# things to not render:
|
||||
# - descriptions
|
||||
if len(keypath) > 0 and keypath[-1] == 'description':
|
||||
return value
|
||||
|
||||
return self.render_value(value)
|
||||
|
||||
def render_project(self, as_parsed):
|
||||
"""Render the parsed data, returning a new dict (or whatever was read).
|
||||
"""
|
||||
try:
|
||||
return deep_map(self._render_project_entry, as_parsed)
|
||||
except RecursionException:
|
||||
raise DbtProjectError(
|
||||
'Cycle detected: Project input has a reference to itself',
|
||||
project=as_parsed
|
||||
)
|
||||
|
||||
def render_profile_data(self, as_parsed):
|
||||
"""Render the chosen profile entry, as it was parsed."""
|
||||
try:
|
||||
return deep_map(self._render_profile_data, as_parsed)
|
||||
except RecursionException:
|
||||
raise DbtProfileError(
|
||||
'Cycle detected: Profile input has a reference to itself',
|
||||
project=as_parsed
|
||||
)
|
||||
|
||||
def render_schema_source(self, as_parsed):
|
||||
try:
|
||||
return deep_map(self._render_schema_source_data, as_parsed)
|
||||
except RecursionException:
|
||||
raise DbtProfileError(
|
||||
'Cycle detected: schema.yml input has a reference to itself',
|
||||
project=as_parsed
|
||||
)
|
||||
205
core/dbt/config/runtime.py
Normal file
205
core/dbt/config/runtime.py
Normal file
@@ -0,0 +1,205 @@
|
||||
from copy import deepcopy
|
||||
|
||||
from dbt.utils import parse_cli_vars
|
||||
from dbt.contracts.project import Configuration
|
||||
from dbt.exceptions import DbtProjectError
|
||||
from dbt.exceptions import ValidationException
|
||||
from dbt.adapters.factory import get_relation_class_by_name
|
||||
|
||||
from .profile import Profile
|
||||
from .project import Project
|
||||
|
||||
|
||||
_ARCHIVE_REMOVED_MESSAGE = '''
|
||||
The `archive` section in `dbt_project.yml` is no longer supported. Please use a
|
||||
`snapshot` block instead. For more information on snapshot blocks and a script
|
||||
to help migrate these archives, please consult the 0.14.0 migration guide:
|
||||
|
||||
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
|
||||
'''.strip()
|
||||
|
||||
|
||||
class RuntimeConfig(Project, Profile):
|
||||
"""The runtime configuration, as constructed from its components. There's a
|
||||
lot because there is a lot of stuff!
|
||||
"""
|
||||
def __init__(self, project_name, version, project_root, source_paths,
|
||||
macro_paths, data_paths, test_paths, analysis_paths,
|
||||
docs_paths, target_path, snapshot_paths, clean_targets,
|
||||
log_path, modules_path, quoting, models, on_run_start,
|
||||
on_run_end, archive, seeds, dbt_version, profile_name,
|
||||
target_name, config, threads, credentials, packages, args):
|
||||
# 'vars'
|
||||
self.args = args
|
||||
self.cli_vars = parse_cli_vars(getattr(args, 'vars', '{}'))
|
||||
# 'project'
|
||||
Project.__init__(
|
||||
self,
|
||||
project_name=project_name,
|
||||
version=version,
|
||||
project_root=project_root,
|
||||
profile_name=profile_name,
|
||||
source_paths=source_paths,
|
||||
macro_paths=macro_paths,
|
||||
data_paths=data_paths,
|
||||
test_paths=test_paths,
|
||||
analysis_paths=analysis_paths,
|
||||
docs_paths=docs_paths,
|
||||
target_path=target_path,
|
||||
snapshot_paths=snapshot_paths,
|
||||
clean_targets=clean_targets,
|
||||
log_path=log_path,
|
||||
modules_path=modules_path,
|
||||
quoting=quoting,
|
||||
models=models,
|
||||
on_run_start=on_run_start,
|
||||
on_run_end=on_run_end,
|
||||
archive=archive,
|
||||
seeds=seeds,
|
||||
dbt_version=dbt_version,
|
||||
packages=packages
|
||||
)
|
||||
# 'profile'
|
||||
Profile.__init__(
|
||||
self,
|
||||
profile_name=profile_name,
|
||||
target_name=target_name,
|
||||
config=config,
|
||||
threads=threads,
|
||||
credentials=credentials
|
||||
)
|
||||
self.validate()
|
||||
|
||||
@classmethod
|
||||
def from_parts(cls, project, profile, args, allow_archive_configs=False):
|
||||
"""Instantiate a RuntimeConfig from its components.
|
||||
|
||||
:param profile Profile: A parsed dbt Profile.
|
||||
:param project Project: A parsed dbt Project.
|
||||
:param args argparse.Namespace: The parsed command-line arguments.
|
||||
:param allow_archive_configs bool: If True, ignore archive blocks in
|
||||
configs. This flag exists to enable archive migration.
|
||||
:returns RuntimeConfig: The new configuration.
|
||||
"""
|
||||
quoting = deepcopy(
|
||||
get_relation_class_by_name(profile.credentials.type)
|
||||
.DEFAULTS['quote_policy']
|
||||
)
|
||||
quoting.update(project.quoting)
|
||||
if project.archive and not allow_archive_configs:
|
||||
# if the user has an `archive` section, raise an error
|
||||
raise DbtProjectError(_ARCHIVE_REMOVED_MESSAGE)
|
||||
|
||||
return cls(
|
||||
project_name=project.project_name,
|
||||
version=project.version,
|
||||
project_root=project.project_root,
|
||||
source_paths=project.source_paths,
|
||||
macro_paths=project.macro_paths,
|
||||
data_paths=project.data_paths,
|
||||
test_paths=project.test_paths,
|
||||
analysis_paths=project.analysis_paths,
|
||||
docs_paths=project.docs_paths,
|
||||
target_path=project.target_path,
|
||||
snapshot_paths=project.snapshot_paths,
|
||||
clean_targets=project.clean_targets,
|
||||
log_path=project.log_path,
|
||||
modules_path=project.modules_path,
|
||||
quoting=quoting,
|
||||
models=project.models,
|
||||
on_run_start=project.on_run_start,
|
||||
on_run_end=project.on_run_end,
|
||||
archive=project.archive,
|
||||
seeds=project.seeds,
|
||||
dbt_version=project.dbt_version,
|
||||
packages=project.packages,
|
||||
profile_name=profile.profile_name,
|
||||
target_name=profile.target_name,
|
||||
config=profile.config,
|
||||
threads=profile.threads,
|
||||
credentials=profile.credentials,
|
||||
args=args
|
||||
)
|
||||
|
||||
def new_project(self, project_root):
|
||||
"""Given a new project root, read in its project dictionary, supply the
|
||||
existing project's profile info, and create a new project file.
|
||||
|
||||
:param project_root str: A filepath to a dbt project.
|
||||
:raises DbtProfileError: If the profile is invalid.
|
||||
:raises DbtProjectError: If project is missing or invalid.
|
||||
:returns RuntimeConfig: The new configuration.
|
||||
"""
|
||||
# copy profile
|
||||
profile = Profile(**self.to_profile_info())
|
||||
profile.validate()
|
||||
# load the new project and its packages. Don't pass cli variables.
|
||||
project = Project.from_project_root(project_root, {})
|
||||
|
||||
cfg = self.from_parts(
|
||||
project=project,
|
||||
profile=profile,
|
||||
args=deepcopy(self.args),
|
||||
)
|
||||
# force our quoting back onto the new project.
|
||||
cfg.quoting = deepcopy(self.quoting)
|
||||
return cfg
|
||||
|
||||
def serialize(self):
|
||||
"""Serialize the full configuration to a single dictionary. For any
|
||||
instance that has passed validate() (which happens in __init__), it
|
||||
matches the Configuration contract.
|
||||
|
||||
Note that args are not serialized.
|
||||
|
||||
:returns dict: The serialized configuration.
|
||||
"""
|
||||
result = self.to_project_config(with_packages=True)
|
||||
result.update(self.to_profile_info(serialize_credentials=True))
|
||||
result['cli_vars'] = deepcopy(self.cli_vars)
|
||||
return result
|
||||
|
||||
def __str__(self):
|
||||
return str(self.serialize())
|
||||
|
||||
def validate(self):
|
||||
"""Validate the configuration against its contract.
|
||||
|
||||
:raises DbtProjectError: If the configuration fails validation.
|
||||
"""
|
||||
try:
|
||||
Configuration(**self.serialize())
|
||||
except ValidationException as e:
|
||||
raise DbtProjectError(str(e))
|
||||
|
||||
if getattr(self.args, 'version_check', False):
|
||||
self.validate_version()
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args, allow_archive_configs=False):
|
||||
"""Given arguments, read in dbt_project.yml from the current directory,
|
||||
read in packages.yml if it exists, and use them to find the profile to
|
||||
load.
|
||||
|
||||
:param args argparse.Namespace: The arguments as parsed from the cli.
|
||||
:param allow_archive_configs bool: If True, ignore archive blocks in
|
||||
configs. This flag exists to enable archive migration.
|
||||
:raises DbtProjectError: If the project is invalid or missing.
|
||||
:raises DbtProfileError: If the profile is invalid or missing.
|
||||
:raises ValidationException: If the cli variables are invalid.
|
||||
"""
|
||||
# build the project and read in packages.yml
|
||||
project = Project.from_args(args)
|
||||
|
||||
# build the profile
|
||||
profile = Profile.from_args(
|
||||
args=args,
|
||||
project_profile_name=project.profile_name
|
||||
)
|
||||
|
||||
return cls.from_parts(
|
||||
project=project,
|
||||
profile=profile,
|
||||
args=args,
|
||||
allow_archive_configs=allow_archive_configs
|
||||
)
|
||||
@@ -1,5 +1,3 @@
|
||||
import copy
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
|
||||
@@ -7,16 +5,16 @@ from dbt.adapters.factory import get_adapter
|
||||
from dbt.compat import basestring
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.contracts.graph.parsed import ParsedMacro, ParsedNode
|
||||
from dbt.include.global_project import PACKAGES
|
||||
from dbt.include.global_project import PROJECT_NAME as GLOBAL_PROJECT_NAME
|
||||
|
||||
import dbt.clients.jinja
|
||||
import dbt.clients.agate_helper
|
||||
import dbt.flags
|
||||
import dbt.schema
|
||||
import dbt.tracking
|
||||
import dbt.writer
|
||||
import dbt.utils
|
||||
|
||||
import dbt.hooks
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
|
||||
|
||||
@@ -34,6 +32,11 @@ class RelationProxy(object):
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.relation_type, key)
|
||||
|
||||
def create_from_source(self, *args, **kwargs):
|
||||
# bypass our create when creating from source so as not to mess up
|
||||
# the source quoting
|
||||
return self.relation_type.create_from_source(*args, **kwargs)
|
||||
|
||||
def create(self, *args, **kwargs):
|
||||
kwargs['quote_policy'] = dbt.utils.merge(
|
||||
self.quoting_config,
|
||||
@@ -42,41 +45,17 @@ class RelationProxy(object):
|
||||
return self.relation_type.create(*args, **kwargs)
|
||||
|
||||
|
||||
class DatabaseWrapper(object):
|
||||
class BaseDatabaseWrapper(object):
|
||||
"""
|
||||
Wrapper for runtime database interaction. Mostly a compatibility layer now.
|
||||
Wrapper for runtime database interaction. Applies the runtime quote policy
|
||||
via a relation proxy.
|
||||
"""
|
||||
def __init__(self, model, adapter):
|
||||
self.model = model
|
||||
def __init__(self, adapter):
|
||||
self.adapter = adapter
|
||||
self.Relation = RelationProxy(adapter)
|
||||
|
||||
self._wrapped = frozenset(
|
||||
self.adapter.config_functions
|
||||
)
|
||||
self._proxied = frozenset(self.adapter.raw_functions)
|
||||
|
||||
def wrap(self, name):
|
||||
func = getattr(self.adapter, name)
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
kwargs['model_name'] = self.model.get('name')
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapped
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._wrapped:
|
||||
return self.wrap(name)
|
||||
elif name in self._proxied:
|
||||
return getattr(self.adapter, name)
|
||||
else:
|
||||
raise AttributeError(
|
||||
"'{}' object has no attribute '{}'".format(
|
||||
self.__class__.__name__, name
|
||||
)
|
||||
)
|
||||
raise NotImplementedError('subclasses need to implement this')
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
@@ -86,7 +65,37 @@ class DatabaseWrapper(object):
|
||||
return self.adapter.type()
|
||||
|
||||
def commit(self):
|
||||
return self.adapter.commit_if_has_connection(self.model.get('name'))
|
||||
return self.adapter.commit_if_has_connection()
|
||||
|
||||
|
||||
class BaseResolver(object):
|
||||
def __init__(self, db_wrapper, model, config, manifest):
|
||||
self.db_wrapper = db_wrapper
|
||||
self.model = model
|
||||
self.config = config
|
||||
self.manifest = manifest
|
||||
|
||||
@property
|
||||
def current_project(self):
|
||||
return self.config.project_name
|
||||
|
||||
@property
|
||||
def Relation(self):
|
||||
return self.db_wrapper.Relation
|
||||
|
||||
|
||||
def _add_macro_map(context, package_name, macro_map):
|
||||
"""Update an existing context in-place, adding the given macro map to the
|
||||
appropriate package namespace. Adapter packages get inserted into the
|
||||
global namespace.
|
||||
"""
|
||||
key = package_name
|
||||
if package_name in PACKAGES:
|
||||
key = GLOBAL_PROJECT_NAME
|
||||
if key not in context:
|
||||
context[key] = {}
|
||||
|
||||
context[key].update(macro_map)
|
||||
|
||||
|
||||
def _add_macros(context, model, manifest):
|
||||
@@ -101,15 +110,12 @@ def _add_macros(context, model, manifest):
|
||||
macro.name: macro.generator(context)
|
||||
}
|
||||
|
||||
if context.get(package_name) is None:
|
||||
context[package_name] = {}
|
||||
|
||||
context.get(package_name, {}) \
|
||||
.update(macro_map)
|
||||
# adapter packages are part of the global project space
|
||||
_add_macro_map(context, package_name, macro_map)
|
||||
|
||||
if package_name == model.package_name:
|
||||
macros_to_add['local'].append(macro_map)
|
||||
elif package_name == dbt.include.GLOBAL_PROJECT_NAME:
|
||||
elif package_name in PACKAGES:
|
||||
macros_to_add['global'].append(macro_map)
|
||||
|
||||
# Load global macros before local macros -- local takes precedence
|
||||
@@ -189,6 +195,13 @@ def _load_result(sql_results):
|
||||
return call
|
||||
|
||||
|
||||
def _debug_here():
|
||||
import sys
|
||||
import ipdb
|
||||
frame = sys._getframe(3)
|
||||
ipdb.set_trace(frame)
|
||||
|
||||
|
||||
def _add_sql_handlers(context):
|
||||
sql_results = {}
|
||||
return dbt.utils.merge(context, {
|
||||
@@ -209,8 +222,7 @@ def log(msg, info=False):
|
||||
class Var(object):
|
||||
UndefinedVarError = "Required var '{}' not found in config:\nVars "\
|
||||
"supplied to {} = {}"
|
||||
NoneVarError = "Supplied var '{}' is undefined in config:\nVars supplied "\
|
||||
"to {} = {}"
|
||||
_VAR_NOTSET = object()
|
||||
|
||||
def __init__(self, model, context, overrides):
|
||||
self.model = model
|
||||
@@ -243,43 +255,33 @@ class Var(object):
|
||||
def pretty_dict(self, data):
|
||||
return json.dumps(data, sort_keys=True, indent=4)
|
||||
|
||||
def get_missing_var(self, var_name):
|
||||
pretty_vars = self.pretty_dict(self.local_vars)
|
||||
msg = self.UndefinedVarError.format(
|
||||
var_name, self.model_name, pretty_vars
|
||||
)
|
||||
dbt.exceptions.raise_compiler_error(msg, self.model)
|
||||
|
||||
def assert_var_defined(self, var_name, default):
|
||||
if var_name not in self.local_vars and default is None:
|
||||
pretty_vars = self.pretty_dict(self.local_vars)
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
self.UndefinedVarError.format(
|
||||
var_name, self.model_name, pretty_vars
|
||||
),
|
||||
self.model
|
||||
)
|
||||
if var_name not in self.local_vars and default is self._VAR_NOTSET:
|
||||
return self.get_missing_var(var_name)
|
||||
|
||||
def assert_var_not_none(self, var_name):
|
||||
def get_rendered_var(self, var_name):
|
||||
raw = self.local_vars[var_name]
|
||||
if raw is None:
|
||||
pretty_vars = self.pretty_dict(self.local_vars)
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
self.NoneVarError.format(
|
||||
var_name, self.model_name, pretty_vars
|
||||
),
|
||||
self.model
|
||||
)
|
||||
|
||||
def __call__(self, var_name, default=None):
|
||||
self.assert_var_defined(var_name, default)
|
||||
|
||||
if var_name not in self.local_vars:
|
||||
return default
|
||||
|
||||
self.assert_var_not_none(var_name)
|
||||
|
||||
raw = self.local_vars[var_name]
|
||||
|
||||
# if bool/int/float/etc are passed in, don't compile anything
|
||||
if not isinstance(raw, basestring):
|
||||
return raw
|
||||
|
||||
return dbt.clients.jinja.get_rendered(raw, self.context)
|
||||
|
||||
def __call__(self, var_name, default=_VAR_NOTSET):
|
||||
if var_name in self.local_vars:
|
||||
return self.get_rendered_var(var_name)
|
||||
elif default is not self._VAR_NOTSET:
|
||||
return default
|
||||
else:
|
||||
return self.get_missing_var(var_name)
|
||||
|
||||
|
||||
def write(node, target_path, subdirectory):
|
||||
def fn(payload):
|
||||
@@ -300,14 +302,14 @@ def render(context, node):
|
||||
def fromjson(string, default=None):
|
||||
try:
|
||||
return json.loads(string)
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def tojson(value, default=None):
|
||||
try:
|
||||
return json.dumps(value)
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
@@ -315,7 +317,7 @@ def try_or_compiler_error(model):
|
||||
def impl(message_if_exception, func, *args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
dbt.exceptions.raise_compiler_error(message_if_exception, model)
|
||||
return impl
|
||||
|
||||
@@ -328,8 +330,46 @@ def get_this_relation(db_wrapper, config, model):
|
||||
return db_wrapper.Relation.create_from_node(config, model)
|
||||
|
||||
|
||||
def get_pytz_module_context():
|
||||
context_exports = pytz.__all__
|
||||
|
||||
return {
|
||||
name: getattr(pytz, name) for name in context_exports
|
||||
}
|
||||
|
||||
|
||||
def get_datetime_module_context():
|
||||
context_exports = [
|
||||
'date',
|
||||
'datetime',
|
||||
'time',
|
||||
'timedelta',
|
||||
'tzinfo'
|
||||
]
|
||||
|
||||
return {
|
||||
name: getattr(datetime, name) for name in context_exports
|
||||
}
|
||||
|
||||
|
||||
def get_context_modules():
|
||||
return {
|
||||
'pytz': get_pytz_module_context(),
|
||||
'datetime': get_datetime_module_context(),
|
||||
}
|
||||
|
||||
|
||||
def generate_config_context(cli_vars):
|
||||
context = {
|
||||
'env_var': env_var,
|
||||
'modules': get_context_modules(),
|
||||
}
|
||||
context['var'] = Var(None, context, cli_vars)
|
||||
return _add_tracking(context)
|
||||
|
||||
|
||||
def generate_base(model, model_dict, config, manifest, source_config,
|
||||
provider):
|
||||
provider, adapter=None):
|
||||
"""Generate the common aspects of the config dict."""
|
||||
if provider is None:
|
||||
raise dbt.exceptions.InternalException(
|
||||
@@ -338,19 +378,19 @@ def generate_base(model, model_dict, config, manifest, source_config,
|
||||
target_name = config.target_name
|
||||
target = config.to_profile_info()
|
||||
del target['credentials']
|
||||
target.update(config.credentials.serialize())
|
||||
target.update(config.credentials.serialize(with_aliases=True))
|
||||
target['type'] = config.credentials.type
|
||||
target.pop('pass', None)
|
||||
target['name'] = target_name
|
||||
|
||||
adapter = get_adapter(config)
|
||||
|
||||
context = {'env': target}
|
||||
schema = config.credentials.schema
|
||||
|
||||
pre_hooks = None
|
||||
post_hooks = None
|
||||
|
||||
db_wrapper = DatabaseWrapper(model_dict, adapter)
|
||||
db_wrapper = provider.DatabaseWrapper(adapter)
|
||||
|
||||
context = dbt.utils.merge(context, {
|
||||
"adapter": db_wrapper,
|
||||
@@ -360,52 +400,36 @@ def generate_base(model, model_dict, config, manifest, source_config,
|
||||
},
|
||||
"column": adapter.Column,
|
||||
"config": provider.Config(model_dict, source_config),
|
||||
"database": config.credentials.database,
|
||||
"env_var": env_var,
|
||||
"exceptions": dbt.exceptions,
|
||||
"exceptions": dbt.exceptions.wrapped_exports(model),
|
||||
"execute": provider.execute,
|
||||
"flags": dbt.flags,
|
||||
# TODO: Do we have to leave this in?
|
||||
"graph": manifest.to_flat_graph(),
|
||||
"graph": manifest.flat_graph,
|
||||
"log": log,
|
||||
"model": model_dict,
|
||||
"modules": {
|
||||
"pytz": pytz,
|
||||
"datetime": datetime
|
||||
},
|
||||
"modules": get_context_modules(),
|
||||
"post_hooks": post_hooks,
|
||||
"pre_hooks": pre_hooks,
|
||||
"ref": provider.ref(db_wrapper, model, config, manifest),
|
||||
"return": _return,
|
||||
"schema": schema,
|
||||
"schema": config.credentials.schema,
|
||||
"sql": None,
|
||||
"sql_now": adapter.date_function(),
|
||||
"source": provider.source(db_wrapper, model, config, manifest),
|
||||
"fromjson": fromjson,
|
||||
"tojson": tojson,
|
||||
"target": target,
|
||||
"try_or_compiler_error": try_or_compiler_error(model)
|
||||
})
|
||||
if os.environ.get('DBT_MACRO_DEBUGGING'):
|
||||
context['debug'] = _debug_here
|
||||
|
||||
# Operations do not represent database relations, so there should be no
|
||||
# 'this' variable in the context for operations. The Operation branch
|
||||
# below should be removed in a future release. The fake relation below
|
||||
# mirrors the historical implementation, without causing errors around
|
||||
# the missing 'alias' attribute for operations
|
||||
#
|
||||
# https://github.com/fishtown-analytics/dbt/issues/878
|
||||
if model.resource_type == NodeType.Operation:
|
||||
this = db_wrapper.adapter.Relation.create(
|
||||
schema=config.credentials.schema,
|
||||
identifier=model.name
|
||||
)
|
||||
else:
|
||||
this = get_this_relation(db_wrapper, config, model_dict)
|
||||
|
||||
context["this"] = this
|
||||
return context
|
||||
|
||||
|
||||
def modify_generated_context(context, model, model_dict, config,
|
||||
manifest):
|
||||
def modify_generated_context(context, model, model_dict, config, manifest,
|
||||
provider):
|
||||
cli_var_overrides = config.cli_vars
|
||||
|
||||
context = _add_tracking(context)
|
||||
@@ -418,38 +442,48 @@ def modify_generated_context(context, model, model_dict, config,
|
||||
|
||||
context["write"] = write(model_dict, config.target_path, 'run')
|
||||
context["render"] = render(context, model_dict)
|
||||
context["var"] = Var(model, context=context, overrides=cli_var_overrides)
|
||||
context["var"] = provider.Var(model, context=context,
|
||||
overrides=cli_var_overrides)
|
||||
context['context'] = context
|
||||
|
||||
return context
|
||||
|
||||
|
||||
def generate_operation_macro(model, config, manifest, provider):
|
||||
"""This is an ugly hack to support the fact that the `docs generate`
|
||||
operation ends up in here, and macros are not nodes.
|
||||
def generate_execute_macro(model, config, manifest, provider):
|
||||
"""Internally, macros can be executed like nodes, with some restrictions:
|
||||
|
||||
- they don't have have all values available that nodes do:
|
||||
- 'this', 'pre_hooks', 'post_hooks', and 'sql' are missing
|
||||
- 'schema' does not use any 'model' information
|
||||
- they can't be configured with config() directives
|
||||
"""
|
||||
model_dict = model.serialize()
|
||||
context = generate_base(model, model_dict, config, manifest,
|
||||
None, provider)
|
||||
context = generate_base(model, model_dict, config, manifest, None,
|
||||
provider)
|
||||
|
||||
return modify_generated_context(context, model, model_dict, config,
|
||||
manifest)
|
||||
manifest, provider)
|
||||
|
||||
|
||||
def generate_model(model, config, manifest, source_config, provider):
|
||||
model_dict = model.to_dict()
|
||||
context = generate_base(model, model_dict, config, manifest,
|
||||
source_config, provider)
|
||||
# overwrite schema if we have it, and hooks + sql
|
||||
# operations (hooks) don't get a 'this'
|
||||
if model.resource_type != NodeType.Operation:
|
||||
this = get_this_relation(context['adapter'], config, model_dict)
|
||||
context['this'] = this
|
||||
# overwrite schema/database if we have them, and hooks + sql
|
||||
context.update({
|
||||
'schema': model.get('schema', context['schema']),
|
||||
'database': model.get('database', context['database']),
|
||||
'pre_hooks': model.config.get('pre-hook'),
|
||||
'post_hooks': model.config.get('post-hook'),
|
||||
'sql': model.get('injected_sql'),
|
||||
})
|
||||
|
||||
return modify_generated_context(context, model, model_dict, config,
|
||||
manifest)
|
||||
manifest, provider)
|
||||
|
||||
|
||||
def generate(model, config, manifest, source_config=None, provider=None):
|
||||
@@ -459,8 +493,4 @@ def generate(model, config, manifest, source_config=None, provider=None):
|
||||
or
|
||||
dbt.context.runtime.generate
|
||||
"""
|
||||
if isinstance(model, ParsedMacro):
|
||||
return generate_operation_macro(model, config, manifest, provider)
|
||||
else:
|
||||
return generate_model(model, config, manifest, source_config,
|
||||
provider)
|
||||
return generate_model(model, config, manifest, source_config, provider)
|
||||
29
core/dbt/context/operation.py
Normal file
29
core/dbt/context/operation.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import dbt.context.common
|
||||
from dbt.context import runtime
|
||||
from dbt.exceptions import raise_compiler_error
|
||||
|
||||
|
||||
class RefResolver(runtime.BaseRefResolver):
|
||||
def __call__(self, *args):
|
||||
# When you call ref(), this is what happens at operation runtime
|
||||
target_model, name = self.resolve(args)
|
||||
return self.create_relation(target_model, name)
|
||||
|
||||
def create_ephemeral_relation(self, target_model, name):
|
||||
# In operations, we can't ref() ephemeral nodes, because ParsedMacros
|
||||
# do not support set_cte
|
||||
raise_compiler_error(
|
||||
'Operations can not ref() ephemeral nodes, but {} is ephemeral'
|
||||
.format(target_model.name),
|
||||
self.model
|
||||
)
|
||||
|
||||
|
||||
class Provider(runtime.Provider):
|
||||
ref = RefResolver
|
||||
|
||||
|
||||
def generate(model, runtime_config, manifest):
|
||||
return dbt.context.common.generate_execute_macro(
|
||||
model, runtime_config, manifest, Provider()
|
||||
)
|
||||
146
core/dbt/context/parser.py
Normal file
146
core/dbt/context/parser.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import dbt.exceptions
|
||||
|
||||
import dbt.context.common
|
||||
from dbt.adapters.factory import get_adapter
|
||||
|
||||
|
||||
def docs(unparsed, docrefs, column_name=None):
|
||||
|
||||
def do_docs(*args):
|
||||
if len(args) != 1 and len(args) != 2:
|
||||
dbt.exceptions.doc_invalid_args(unparsed, args)
|
||||
doc_package_name = ''
|
||||
doc_name = args[0]
|
||||
if len(args) == 2:
|
||||
doc_package_name = args[1]
|
||||
|
||||
docref = {
|
||||
'documentation_package': doc_package_name,
|
||||
'documentation_name': doc_name,
|
||||
}
|
||||
if column_name is not None:
|
||||
docref['column_name'] = column_name
|
||||
|
||||
docrefs.append(docref)
|
||||
|
||||
# IDK
|
||||
return True
|
||||
|
||||
return do_docs
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self, model, source_config):
|
||||
self.model = model
|
||||
self.source_config = source_config
|
||||
|
||||
def _transform_config(self, config):
|
||||
for oldkey in ('pre_hook', 'post_hook'):
|
||||
if oldkey in config:
|
||||
newkey = oldkey.replace('_', '-')
|
||||
if newkey in config:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Invalid config, has conflicting keys "{}" and "{}"'
|
||||
.format(oldkey, newkey),
|
||||
self.model
|
||||
)
|
||||
config[newkey] = config.pop(oldkey)
|
||||
return config
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
if len(args) == 1 and len(kwargs) == 0:
|
||||
opts = args[0]
|
||||
elif len(args) == 0 and len(kwargs) > 0:
|
||||
opts = kwargs
|
||||
else:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
"Invalid inline model config",
|
||||
self.model)
|
||||
|
||||
opts = self._transform_config(opts)
|
||||
|
||||
self.source_config.update_in_model_config(opts)
|
||||
return ''
|
||||
|
||||
def set(self, name, value):
|
||||
return self.__call__({name: value})
|
||||
|
||||
def require(self, name, validator=None):
|
||||
return ''
|
||||
|
||||
def get(self, name, validator=None, default=None):
|
||||
return ''
|
||||
|
||||
|
||||
class DatabaseWrapper(dbt.context.common.BaseDatabaseWrapper):
|
||||
"""The parser subclass of the database wrapper applies any explicit
|
||||
parse-time overrides.
|
||||
"""
|
||||
def __getattr__(self, name):
|
||||
override = (name in self.adapter._available_ and
|
||||
name in self.adapter._parse_replacements_)
|
||||
|
||||
if override:
|
||||
return self.adapter._parse_replacements_[name]
|
||||
elif name in self.adapter._available_:
|
||||
return getattr(self.adapter, name)
|
||||
else:
|
||||
raise AttributeError(
|
||||
"'{}' object has no attribute '{}'".format(
|
||||
self.__class__.__name__, name
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class Var(dbt.context.common.Var):
|
||||
def get_missing_var(self, var_name):
|
||||
# in the parser, just always return None.
|
||||
return None
|
||||
|
||||
|
||||
class RefResolver(dbt.context.common.BaseResolver):
|
||||
def __call__(self, *args):
|
||||
# When you call ref(), this is what happens at parse time
|
||||
if len(args) == 1 or len(args) == 2:
|
||||
self.model.refs.append(list(args))
|
||||
|
||||
else:
|
||||
dbt.exceptions.ref_invalid_args(self.model, args)
|
||||
|
||||
return self.Relation.create_from_node(self.config, self.model)
|
||||
|
||||
|
||||
class SourceResolver(dbt.context.common.BaseResolver):
|
||||
def __call__(self, source_name, table_name):
|
||||
# When you call source(), this is what happens at parse time
|
||||
self.model.sources.append([source_name, table_name])
|
||||
return self.Relation.create_from_node(self.config, self.model)
|
||||
|
||||
|
||||
class Provider(object):
|
||||
execute = False
|
||||
Config = Config
|
||||
DatabaseWrapper = DatabaseWrapper
|
||||
Var = Var
|
||||
ref = RefResolver
|
||||
source = SourceResolver
|
||||
|
||||
|
||||
def generate(model, runtime_config, manifest, source_config):
|
||||
# during parsing, we don't have a connection, but we might need one, so we
|
||||
# have to acquire it.
|
||||
# In the future, it would be nice to lazily open the connection, as in some
|
||||
# projects it would be possible to parse without connecting to the db
|
||||
with get_adapter(runtime_config).connection_named(model.get('name')):
|
||||
return dbt.context.common.generate(
|
||||
model, runtime_config, manifest, source_config, Provider()
|
||||
)
|
||||
|
||||
|
||||
def generate_macro(model, runtime_config, manifest):
|
||||
# parser.generate_macro is called by the get_${attr}_func family of Parser
|
||||
# methods, which preparse and cache the generate_${attr}_name family of
|
||||
# macros for use during parsing
|
||||
return dbt.context.common.generate_execute_macro(
|
||||
model, runtime_config, manifest, Provider()
|
||||
)
|
||||
150
core/dbt/context/runtime.py
Normal file
150
core/dbt/context/runtime.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from dbt.utils import get_materialization, add_ephemeral_model_prefix
|
||||
|
||||
import dbt.clients.jinja
|
||||
import dbt.context.common
|
||||
import dbt.flags
|
||||
from dbt.parser import ParserUtils
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
|
||||
|
||||
class BaseRefResolver(dbt.context.common.BaseResolver):
|
||||
def resolve(self, args):
|
||||
name = None
|
||||
package = None
|
||||
|
||||
if len(args) == 1:
|
||||
name = args[0]
|
||||
elif len(args) == 2:
|
||||
package, name = args
|
||||
else:
|
||||
dbt.exceptions.ref_invalid_args(self.model, args)
|
||||
|
||||
target_model = ParserUtils.resolve_ref(
|
||||
self.manifest,
|
||||
name,
|
||||
package,
|
||||
self.current_project,
|
||||
self.model.package_name)
|
||||
|
||||
if target_model is None or target_model is ParserUtils.DISABLED:
|
||||
dbt.exceptions.ref_target_not_found(
|
||||
self.model,
|
||||
name,
|
||||
package)
|
||||
return target_model, name
|
||||
|
||||
def create_ephemeral_relation(self, target_model, name):
|
||||
self.model.set_cte(target_model.unique_id, None)
|
||||
return self.Relation.create(
|
||||
type=self.Relation.CTE,
|
||||
identifier=add_ephemeral_model_prefix(name)
|
||||
).quote(identifier=False)
|
||||
|
||||
def create_relation(self, target_model, name):
|
||||
if get_materialization(target_model) == 'ephemeral':
|
||||
return self.create_ephemeral_relation(target_model, name)
|
||||
else:
|
||||
return self.Relation.create_from_node(self.config, target_model)
|
||||
|
||||
|
||||
class RefResolver(BaseRefResolver):
|
||||
def validate(self, resolved, args):
|
||||
if resolved.unique_id not in self.model.depends_on.get('nodes'):
|
||||
dbt.exceptions.ref_bad_context(self.model, args)
|
||||
|
||||
def __call__(self, *args):
|
||||
# When you call ref(), this is what happens at runtime
|
||||
target_model, name = self.resolve(args)
|
||||
self.validate(target_model, args)
|
||||
return self.create_relation(target_model, name)
|
||||
|
||||
|
||||
class SourceResolver(dbt.context.common.BaseResolver):
|
||||
def resolve(self, source_name, table_name):
|
||||
target_source = ParserUtils.resolve_source(
|
||||
self.manifest,
|
||||
source_name,
|
||||
table_name,
|
||||
self.current_project,
|
||||
self.model.package_name
|
||||
)
|
||||
|
||||
if target_source is None:
|
||||
dbt.exceptions.source_target_not_found(
|
||||
self.model,
|
||||
source_name,
|
||||
table_name)
|
||||
return target_source
|
||||
|
||||
def __call__(self, source_name, table_name):
|
||||
"""When you call source(), this is what happens at runtime"""
|
||||
target_source = self.resolve(source_name, table_name)
|
||||
return self.Relation.create_from_source(target_source)
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self, model, source_config=None):
|
||||
self.model = model
|
||||
# we never use or get a source config, only the parser cares
|
||||
|
||||
def __call__(*args, **kwargs):
|
||||
return ''
|
||||
|
||||
def set(self, name, value):
|
||||
return self.__call__({name: value})
|
||||
|
||||
def _validate(self, validator, value):
|
||||
validator(value)
|
||||
|
||||
def require(self, name, validator=None):
|
||||
if name not in self.model['config']:
|
||||
dbt.exceptions.missing_config(self.model, name)
|
||||
|
||||
to_return = self.model['config'][name]
|
||||
|
||||
if validator is not None:
|
||||
self._validate(validator, to_return)
|
||||
|
||||
return to_return
|
||||
|
||||
def get(self, name, validator=None, default=None):
|
||||
to_return = self.model['config'].get(name, default)
|
||||
|
||||
if validator is not None and default is not None:
|
||||
self._validate(validator, to_return)
|
||||
|
||||
return to_return
|
||||
|
||||
|
||||
class DatabaseWrapper(dbt.context.common.BaseDatabaseWrapper):
|
||||
"""The runtime database wrapper exposes everything the adapter marks
|
||||
available.
|
||||
"""
|
||||
def __getattr__(self, name):
|
||||
if name in self.adapter._available_:
|
||||
return getattr(self.adapter, name)
|
||||
else:
|
||||
raise AttributeError(
|
||||
"'{}' object has no attribute '{}'".format(
|
||||
self.__class__.__name__, name
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class Var(dbt.context.common.Var):
|
||||
pass
|
||||
|
||||
|
||||
class Provider(object):
|
||||
execute = True
|
||||
Config = Config
|
||||
DatabaseWrapper = DatabaseWrapper
|
||||
Var = Var
|
||||
ref = RefResolver
|
||||
source = SourceResolver
|
||||
|
||||
|
||||
def generate(model, runtime_config, manifest):
|
||||
return dbt.context.common.generate(
|
||||
model, runtime_config, manifest, None, Provider())
|
||||
71
core/dbt/contracts/connection.py
Normal file
71
core/dbt/contracts/connection.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from dbt.api.object import APIObject
|
||||
from dbt.contracts.common import named_property
|
||||
|
||||
|
||||
CONNECTION_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'type': {
|
||||
'type': 'string',
|
||||
# valid python identifiers only
|
||||
'pattern': r'^[A-Za-z_][A-Za-z0-9_]+$',
|
||||
},
|
||||
'name': {
|
||||
'type': ['null', 'string'],
|
||||
},
|
||||
'state': {
|
||||
'enum': ['init', 'open', 'closed', 'fail'],
|
||||
},
|
||||
'transaction_open': {
|
||||
'type': 'boolean',
|
||||
},
|
||||
# we can't serialize this so we can't require it as part of the
|
||||
# contract.
|
||||
# 'handle': {
|
||||
# 'type': ['null', 'object'],
|
||||
# },
|
||||
# credentials are validated separately by the adapter packages
|
||||
'credentials': {
|
||||
'description': (
|
||||
'The credentials object here should match the connection type.'
|
||||
),
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
}
|
||||
},
|
||||
'required': [
|
||||
'type', 'name', 'state', 'transaction_open', 'credentials'
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class Connection(APIObject):
|
||||
SCHEMA = CONNECTION_CONTRACT
|
||||
|
||||
def __init__(self, credentials, *args, **kwargs):
|
||||
# we can't serialize handles
|
||||
self._handle = kwargs.pop('handle')
|
||||
super(Connection, self).__init__(credentials=credentials.serialize(),
|
||||
*args, **kwargs)
|
||||
# this will validate itself in its own __init__.
|
||||
self._credentials = credentials
|
||||
|
||||
@property
|
||||
def credentials(self):
|
||||
return self._credentials
|
||||
|
||||
@property
|
||||
def handle(self):
|
||||
return self._handle
|
||||
|
||||
@handle.setter
|
||||
def handle(self, value):
|
||||
self._handle = value
|
||||
|
||||
name = named_property('name', 'The name of this connection')
|
||||
state = named_property('state', 'The state of the connection')
|
||||
transaction_open = named_property(
|
||||
'transaction_open',
|
||||
'True if there is an open transaction, False otherwise.'
|
||||
)
|
||||
@@ -1,7 +1,4 @@
|
||||
from copy import copy, deepcopy
|
||||
|
||||
from dbt.api import APIObject
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.utils import deep_merge
|
||||
from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \
|
||||
PARSED_MACRO_CONTRACT, ParsedNode
|
||||
@@ -76,6 +73,7 @@ COMPILED_NODE_CONTRACT = deep_merge(
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
COMPILED_NODES_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -87,8 +85,10 @@ COMPILED_NODES_CONTRACT = {
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
COMPILED_MACRO_CONTRACT = PARSED_MACRO_CONTRACT
|
||||
|
||||
|
||||
COMPILED_MACROS_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -100,6 +100,7 @@ COMPILED_MACROS_CONTRACT = {
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
COMPILED_GRAPH_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -1,18 +1,23 @@
|
||||
from dbt.api import APIObject
|
||||
from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT
|
||||
from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \
|
||||
PARSED_MACRO_CONTRACT, PARSED_DOCUMENTATION_CONTRACT, ParsedNode
|
||||
PARSED_MACRO_CONTRACT, PARSED_DOCUMENTATION_CONTRACT, \
|
||||
PARSED_SOURCE_DEFINITION_CONTRACT
|
||||
from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT, CompiledNode
|
||||
from dbt.exceptions import ValidationException
|
||||
from dbt.exceptions import raise_duplicate_resource_name
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt import tracking
|
||||
import dbt.utils
|
||||
|
||||
# We allow either parsed or compiled nodes, as some 'compile()' calls in the
|
||||
# runner actually just return the original parsed node they were given.
|
||||
# We allow either parsed or compiled nodes, or parsed sources, as some
|
||||
# 'compile()' calls in the runner actually just return the original parsed
|
||||
# node they were given.
|
||||
COMPILE_RESULT_NODE_CONTRACT = {
|
||||
'anyOf': [PARSED_NODE_CONTRACT, COMPILED_NODE_CONTRACT]
|
||||
'anyOf': [
|
||||
PARSED_NODE_CONTRACT,
|
||||
COMPILED_NODE_CONTRACT,
|
||||
PARSED_SOURCE_DEFINITION_CONTRACT,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -81,14 +86,8 @@ PARSED_MANIFEST_CONTRACT = {
|
||||
'docs': PARSED_DOCUMENTATIONS_CONTRACT,
|
||||
'disabled': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
},
|
||||
'description': 'A disabled node FQN',
|
||||
},
|
||||
'description': 'An array of disabled node FQNs',
|
||||
'items': PARSED_NODE_CONTRACT,
|
||||
'description': 'An array of disabled nodes',
|
||||
},
|
||||
'generated_at': {
|
||||
'type': 'string',
|
||||
@@ -186,6 +185,7 @@ class Manifest(APIObject):
|
||||
self.generated_at = generated_at
|
||||
self.metadata = metadata
|
||||
self.disabled = disabled
|
||||
self.flat_graph = None
|
||||
super(Manifest, self).__init__()
|
||||
|
||||
@staticmethod
|
||||
@@ -221,9 +221,25 @@ class Manifest(APIObject):
|
||||
'child_map': forward_edges,
|
||||
'generated_at': self.generated_at,
|
||||
'metadata': self.metadata,
|
||||
'disabled': self.disabled,
|
||||
'disabled': [v.serialize() for v in self.disabled],
|
||||
}
|
||||
|
||||
def build_flat_graph(self):
|
||||
"""This attribute is used in context.common by each node, so we want to
|
||||
only build it once and avoid any concurrency issues around it.
|
||||
Make sure you don't call this until you're done with building your
|
||||
manifest!
|
||||
"""
|
||||
self.flat_graph = {
|
||||
'nodes': {
|
||||
k: v.serialize() for k, v in self.nodes.items()
|
||||
},
|
||||
}
|
||||
|
||||
def find_disabled_by_name(self, name, package=None):
|
||||
return dbt.utils.find_in_list_by_name(self.disabled, name, package,
|
||||
NodeType.refable())
|
||||
|
||||
def _find_by_name(self, name, package, subgraph, nodetype):
|
||||
"""
|
||||
|
||||
@@ -258,13 +274,6 @@ class Manifest(APIObject):
|
||||
return doc
|
||||
return None
|
||||
|
||||
def find_operation_by_name(self, name, package):
|
||||
"""Find a macro in the graph by its name and package name, or None for
|
||||
any package.
|
||||
"""
|
||||
return self._find_by_name(name, package, 'macros',
|
||||
[NodeType.Operation])
|
||||
|
||||
def find_macro_by_name(self, name, package):
|
||||
"""Find a macro in the graph by its name and package name, or None for
|
||||
any package.
|
||||
@@ -277,6 +286,13 @@ class Manifest(APIObject):
|
||||
"""
|
||||
return self._find_by_name(name, package, 'nodes', NodeType.refable())
|
||||
|
||||
def find_source_by_name(self, source_name, table_name, package):
|
||||
"""Find any valid target for "source()" in the graph by its name and
|
||||
package name, or None for any package.
|
||||
"""
|
||||
name = '{}.{}'.format(source_name, table_name)
|
||||
return self._find_by_name(name, package, 'nodes', [NodeType.Source])
|
||||
|
||||
def get_materialization_macro(self, materialization_name,
|
||||
adapter_type=None):
|
||||
macro_name = dbt.utils.get_materialization_macro_name(
|
||||
@@ -302,6 +318,8 @@ class Manifest(APIObject):
|
||||
def get_resource_fqns(self):
|
||||
resource_fqns = {}
|
||||
for unique_id, node in self.nodes.items():
|
||||
if node.resource_type == NodeType.Source:
|
||||
continue # sources have no FQNs and can't be configured
|
||||
resource_type_plural = node.resource_type + 's'
|
||||
if resource_type_plural not in resource_fqns:
|
||||
resource_fqns[resource_type_plural] = set()
|
||||
@@ -323,24 +341,23 @@ class Manifest(APIObject):
|
||||
return to_return
|
||||
|
||||
def _model_matches_schema_and_table(self, schema, table, model):
|
||||
if model.resource_type == NodeType.Source:
|
||||
return (model.schema.lower() == schema.lower() and
|
||||
model.identifier.lower() == table.lower())
|
||||
return (model.schema.lower() == schema.lower() and
|
||||
model.alias.lower() == table.lower())
|
||||
|
||||
def get_unique_id_for_schema_and_table(self, schema, table):
|
||||
def get_unique_ids_for_schema_and_table(self, schema, table):
|
||||
"""
|
||||
Given a schema and table, find a matching model, and return
|
||||
the unique_id for that model. If more than one matching
|
||||
model is found, raise an exception.
|
||||
Given a schema and table, find matching models, and return
|
||||
their unique_ids. A schema and table may have more than one
|
||||
match if the relation matches both a source and a seed, for instance.
|
||||
"""
|
||||
def predicate(model):
|
||||
return self._model_matches_schema_and_table(schema, table, model)
|
||||
|
||||
matching = list(self._filter_subgraph(self.nodes, predicate))
|
||||
|
||||
if not matching:
|
||||
return None
|
||||
|
||||
return matching[0].get('unique_id')
|
||||
return [match.get('unique_id') for match in matching]
|
||||
|
||||
def add_nodes(self, new_nodes):
|
||||
"""Add the given dict of new nodes to the manifest."""
|
||||
@@ -375,29 +392,27 @@ class Manifest(APIObject):
|
||||
'not found or is disabled').format(patch.name)
|
||||
)
|
||||
|
||||
def to_flat_graph(self):
|
||||
"""Convert the parsed manifest to the 'flat graph' that the compiler
|
||||
expects.
|
||||
|
||||
Kind of hacky note: everything in the code is happy to deal with
|
||||
macros as ParsedMacro objects (in fact, it's been changed to require
|
||||
that), so those can just be returned without any work. Nodes sadly
|
||||
require a lot of work on the compiler side.
|
||||
|
||||
Ideally in the future we won't need to have this method.
|
||||
"""
|
||||
return {
|
||||
'nodes': {k: v.to_shallow_dict() for k, v in self.nodes.items()},
|
||||
'macros': self.macros,
|
||||
}
|
||||
|
||||
def __getattr__(self, name):
|
||||
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||
type(self).__name__, name)
|
||||
)
|
||||
|
||||
def get_used_schemas(self):
|
||||
def get_used_schemas(self, resource_types=None):
|
||||
return frozenset({
|
||||
node.schema
|
||||
(node.database, node.schema)
|
||||
for node in self.nodes.values()
|
||||
if not resource_types or node.resource_type in resource_types
|
||||
})
|
||||
|
||||
def get_used_databases(self):
|
||||
return frozenset(node.database for node in self.nodes.values())
|
||||
|
||||
def deepcopy(self, config=None):
|
||||
return Manifest(
|
||||
nodes={k: v.incorporate() for k, v in self.nodes.items()},
|
||||
macros={k: v.incorporate() for k, v in self.macros.items()},
|
||||
docs={k: v.incorporate() for k, v in self.docs.items()},
|
||||
generated_at=self.generated_at,
|
||||
disabled=[n.incorporate() for n in self.disabled],
|
||||
config=config
|
||||
)
|
||||
@@ -1,13 +1,12 @@
|
||||
from dbt.api import APIObject
|
||||
from dbt.utils import deep_merge
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.exceptions import raise_duplicate_resource_name, \
|
||||
raise_patch_targets_not_found
|
||||
|
||||
import dbt.clients.jinja
|
||||
|
||||
from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT, \
|
||||
UNPARSED_MACRO_CONTRACT, UNPARSED_DOCUMENTATION_FILE_CONTRACT
|
||||
UNPARSED_MACRO_CONTRACT, UNPARSED_DOCUMENTATION_FILE_CONTRACT, \
|
||||
UNPARSED_BASE_CONTRACT, TIME_CONTRACT
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
|
||||
@@ -48,6 +47,10 @@ CONFIG_CONTRACT = {
|
||||
'materialized': {
|
||||
'type': 'string',
|
||||
},
|
||||
'persist_docs': {
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
},
|
||||
'post-hook': {
|
||||
'type': 'array',
|
||||
'items': HOOK_CONTRACT,
|
||||
@@ -81,10 +84,14 @@ CONFIG_CONTRACT = {
|
||||
}
|
||||
]
|
||||
},
|
||||
'severity': {
|
||||
'type': 'string',
|
||||
'pattern': '([eE][rR][rR][oO][rR]|[wW][aA][rR][nN])',
|
||||
},
|
||||
},
|
||||
'required': [
|
||||
'enabled', 'materialized', 'post-hook', 'pre-hook', 'vars',
|
||||
'quoting', 'column_types', 'tags'
|
||||
'quoting', 'column_types', 'tags', 'persist_docs'
|
||||
]
|
||||
}
|
||||
|
||||
@@ -137,126 +144,204 @@ DOCREF_CONTRACT = {
|
||||
}
|
||||
|
||||
|
||||
HAS_FQN_CONTRACT = {
|
||||
'properties': {
|
||||
'fqn': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
}
|
||||
},
|
||||
},
|
||||
'required': ['fqn'],
|
||||
}
|
||||
|
||||
|
||||
HAS_UNIQUE_ID_CONTRACT = {
|
||||
'properties': {
|
||||
'unique_id': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
},
|
||||
},
|
||||
'required': ['unique_id'],
|
||||
}
|
||||
|
||||
CAN_REF_CONTRACT = {
|
||||
'properties': {
|
||||
'refs': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'array',
|
||||
'description': (
|
||||
'The list of arguments passed to a single ref call.'
|
||||
),
|
||||
},
|
||||
'description': (
|
||||
'The list of call arguments, one list of arguments per '
|
||||
'call.'
|
||||
)
|
||||
},
|
||||
'sources': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'array',
|
||||
'description': (
|
||||
'The list of arguments passed to a single source call.'
|
||||
),
|
||||
},
|
||||
'description': (
|
||||
'The list of call arguments, one list of arguments per '
|
||||
'call.'
|
||||
)
|
||||
},
|
||||
'depends_on': {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'nodes': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
'description': (
|
||||
'A node unique ID that this depends on.'
|
||||
)
|
||||
}
|
||||
},
|
||||
'macros': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
'description': (
|
||||
'A macro unique ID that this depends on.'
|
||||
)
|
||||
}
|
||||
},
|
||||
},
|
||||
'description': (
|
||||
'A list of unique IDs for nodes and macros that this '
|
||||
'node depends upon.'
|
||||
),
|
||||
'required': ['nodes', 'macros'],
|
||||
},
|
||||
},
|
||||
'required': ['refs', 'sources', 'depends_on'],
|
||||
}
|
||||
|
||||
|
||||
HAS_DOCREFS_CONTRACT = {
|
||||
'properties': {
|
||||
'docrefs': {
|
||||
'type': 'array',
|
||||
'items': DOCREF_CONTRACT,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
HAS_DESCRIPTION_CONTRACT = {
|
||||
'properties': {
|
||||
'description': {
|
||||
'type': 'string',
|
||||
'description': 'A user-supplied description of the model',
|
||||
},
|
||||
'columns': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'.*': COLUMN_INFO_CONTRACT,
|
||||
},
|
||||
},
|
||||
},
|
||||
'required': ['description', 'columns'],
|
||||
}
|
||||
|
||||
# does this belong inside another contract?
|
||||
HAS_CONFIG_CONTRACT = {
|
||||
'properties': {
|
||||
'config': CONFIG_CONTRACT,
|
||||
},
|
||||
'required': ['config'],
|
||||
}
|
||||
|
||||
|
||||
COLUMN_TEST_CONTRACT = {
|
||||
'properties': {
|
||||
'column_name': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'In tests parsed from a v2 schema, the column the test is '
|
||||
'associated with (if there is one)'
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HAS_RELATION_METADATA_CONTRACT = {
|
||||
'properties': {
|
||||
'database': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The actual database string that this will build into.'
|
||||
)
|
||||
},
|
||||
'schema': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The actual schema string that this will build into.'
|
||||
)
|
||||
},
|
||||
},
|
||||
'required': ['database', 'schema'],
|
||||
}
|
||||
|
||||
|
||||
PARSED_NODE_CONTRACT = deep_merge(
|
||||
UNPARSED_NODE_CONTRACT,
|
||||
HAS_UNIQUE_ID_CONTRACT,
|
||||
HAS_FQN_CONTRACT,
|
||||
CAN_REF_CONTRACT,
|
||||
HAS_DOCREFS_CONTRACT,
|
||||
HAS_DESCRIPTION_CONTRACT,
|
||||
HAS_CONFIG_CONTRACT,
|
||||
COLUMN_TEST_CONTRACT,
|
||||
HAS_RELATION_METADATA_CONTRACT,
|
||||
{
|
||||
'properties': {
|
||||
'unique_id': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
},
|
||||
'fqn': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
}
|
||||
},
|
||||
'schema': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The actual database string that this will build into.'
|
||||
)
|
||||
},
|
||||
'alias': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The name of the relation that this will build into'
|
||||
)
|
||||
},
|
||||
'refs': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'array',
|
||||
'description': (
|
||||
'The list of arguments passed to a single ref call.'
|
||||
),
|
||||
},
|
||||
'description': (
|
||||
'The list of call arguments, one list of arguments per '
|
||||
'call.'
|
||||
)
|
||||
},
|
||||
'depends_on': {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'nodes': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
'description': (
|
||||
'A node unique ID that this depends on.'
|
||||
)
|
||||
}
|
||||
},
|
||||
'macros': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
'minLength': 1,
|
||||
'description': (
|
||||
'A macro unique ID that this depends on.'
|
||||
)
|
||||
}
|
||||
},
|
||||
},
|
||||
'description': (
|
||||
'A list of unique IDs for nodes and macros that this '
|
||||
'node depends upon.'
|
||||
),
|
||||
'required': ['nodes', 'macros'],
|
||||
},
|
||||
# TODO: move this into a class property.
|
||||
'empty': {
|
||||
'type': 'boolean',
|
||||
'description': 'True if the SQL is empty',
|
||||
},
|
||||
'config': CONFIG_CONTRACT,
|
||||
'tags': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'type': 'string',
|
||||
}
|
||||
},
|
||||
'description': {
|
||||
'type': 'string',
|
||||
'description': 'A user-supplied description of the model',
|
||||
},
|
||||
'columns': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'.*': COLUMN_INFO_CONTRACT,
|
||||
}
|
||||
},
|
||||
# this is really nodes-only
|
||||
'patch_path': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The path to the patch source if the node was patched'
|
||||
),
|
||||
},
|
||||
'docrefs': {
|
||||
'type': 'array',
|
||||
'items': DOCREF_CONTRACT,
|
||||
},
|
||||
'build_path': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'In seeds, the path to the source file used during build.'
|
||||
),
|
||||
},
|
||||
'column_name': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'In tests parsed from a v2 schema, the column the test is '
|
||||
'associated with (if there is one)'
|
||||
)
|
||||
},
|
||||
},
|
||||
'required': UNPARSED_NODE_CONTRACT['required'] + [
|
||||
'unique_id', 'fqn', 'schema', 'refs', 'depends_on', 'empty',
|
||||
'config', 'tags', 'alias', 'columns', 'description'
|
||||
]
|
||||
'required': ['empty', 'tags', 'alias'],
|
||||
}
|
||||
)
|
||||
|
||||
@@ -270,6 +355,18 @@ class ParsedNode(APIObject):
|
||||
kwargs.setdefault('description', '')
|
||||
super(ParsedNode, self).__init__(**kwargs)
|
||||
|
||||
@property
|
||||
def is_refable(self):
|
||||
return self.resource_type in NodeType.refable()
|
||||
|
||||
@property
|
||||
def is_ephemeral(self):
|
||||
return self.get('config', {}).get('materialized') == 'ephemeral'
|
||||
|
||||
@property
|
||||
def is_ephemeral_model(self):
|
||||
return self.is_refable and self.is_ephemeral
|
||||
|
||||
@property
|
||||
def depends_on_nodes(self):
|
||||
"""Return the list of node IDs that this node depends on."""
|
||||
@@ -319,6 +416,14 @@ class ParsedNode(APIObject):
|
||||
def build_path(self, value):
|
||||
self._contents['build_path'] = value
|
||||
|
||||
@property
|
||||
def database(self):
|
||||
return self._contents['database']
|
||||
|
||||
@database.setter
|
||||
def database(self, value):
|
||||
self._contents['database'] = value
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
return self._contents['schema']
|
||||
@@ -344,6 +449,79 @@ class ParsedNode(APIObject):
|
||||
self._contents['config'] = value
|
||||
|
||||
|
||||
SNAPSHOT_CONFIG_CONTRACT = {
|
||||
'properties': {
|
||||
'target_database': {
|
||||
'type': 'string',
|
||||
},
|
||||
'target_schema': {
|
||||
'type': 'string',
|
||||
},
|
||||
'unique_key': {
|
||||
'type': 'string',
|
||||
},
|
||||
'anyOf': [
|
||||
{
|
||||
'properties': {
|
||||
'strategy': {
|
||||
'enum': ['timestamp'],
|
||||
},
|
||||
'updated_at': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The column name with the timestamp to compare'
|
||||
),
|
||||
},
|
||||
},
|
||||
'required': ['updated_at'],
|
||||
},
|
||||
{
|
||||
'properties': {
|
||||
'strategy': {
|
||||
'enum': ['check'],
|
||||
},
|
||||
'check_cols': {
|
||||
'oneOf': [
|
||||
{
|
||||
'type': 'array',
|
||||
'items': {'type': 'string'},
|
||||
'description': 'The columns to check',
|
||||
'minLength': 1,
|
||||
},
|
||||
{
|
||||
'enum': ['all'],
|
||||
'description': 'Check all columns',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
'required': ['check_cols'],
|
||||
}
|
||||
]
|
||||
},
|
||||
'required': [
|
||||
'target_schema', 'unique_key', 'strategy',
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
PARSED_SNAPSHOT_NODE_CONTRACT = deep_merge(
|
||||
PARSED_NODE_CONTRACT,
|
||||
{
|
||||
'properties': {
|
||||
'config': SNAPSHOT_CONFIG_CONTRACT,
|
||||
'resource_type': {
|
||||
'enum': [NodeType.Snapshot],
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ParsedSnapshotNode(ParsedNode):
|
||||
SCHEMA = PARSED_SNAPSHOT_NODE_CONTRACT
|
||||
|
||||
|
||||
# The parsed node update is only the 'patch', not the test. The test became a
|
||||
# regular parsed node. Note that description and columns must be present, but
|
||||
# may be empty.
|
||||
@@ -378,8 +556,9 @@ PARSED_NODE_PATCH_CONTRACT = {
|
||||
'items': DOCREF_CONTRACT,
|
||||
}
|
||||
},
|
||||
'required': ['name', 'original_file_path', 'description', 'columns',
|
||||
'docrefs'],
|
||||
'required': [
|
||||
'name', 'original_file_path', 'description', 'columns', 'docrefs'
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -405,7 +584,6 @@ PARSED_MACRO_CONTRACT = deep_merge(
|
||||
'resource_type': {
|
||||
'enum': [
|
||||
NodeType.Macro,
|
||||
NodeType.Operation,
|
||||
],
|
||||
},
|
||||
'unique_id': {
|
||||
@@ -440,7 +618,7 @@ PARSED_MACRO_CONTRACT = deep_merge(
|
||||
'required': ['macros'],
|
||||
},
|
||||
},
|
||||
'required': UNPARSED_MACRO_CONTRACT['required'] + [
|
||||
'required': [
|
||||
'resource_type', 'unique_id', 'tags', 'depends_on', 'name',
|
||||
]
|
||||
}
|
||||
@@ -484,9 +662,7 @@ PARSED_DOCUMENTATION_CONTRACT = deep_merge(
|
||||
'description': 'The contents of just the docs block',
|
||||
},
|
||||
},
|
||||
'required': UNPARSED_DOCUMENTATION_FILE_CONTRACT['required'] + [
|
||||
'name', 'unique_id', 'block_contents',
|
||||
],
|
||||
'required': ['name', 'unique_id', 'block_contents'],
|
||||
}
|
||||
)
|
||||
|
||||
@@ -513,3 +689,127 @@ class ParsedDocumentation(APIObject):
|
||||
|
||||
class Hook(APIObject):
|
||||
SCHEMA = HOOK_CONTRACT
|
||||
|
||||
|
||||
FRESHNESS_CONTRACT = {
|
||||
'properties': {
|
||||
'loaded_at_field': {
|
||||
'type': ['null', 'string'],
|
||||
'description': 'The field to use as the "loaded at" timestamp',
|
||||
},
|
||||
'freshness': {
|
||||
'anyOf': [
|
||||
{'type': 'null'},
|
||||
{
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'warn_after': TIME_CONTRACT,
|
||||
'error_after': TIME_CONTRACT,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
QUOTING_CONTRACT = {
|
||||
'properties': {
|
||||
'quoting': {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'database': {'type': 'boolean'},
|
||||
'schema': {'type': 'boolean'},
|
||||
'identifier': {'type': 'boolean'},
|
||||
},
|
||||
},
|
||||
},
|
||||
'required': ['quoting'],
|
||||
}
|
||||
|
||||
|
||||
PARSED_SOURCE_DEFINITION_CONTRACT = deep_merge(
|
||||
UNPARSED_BASE_CONTRACT,
|
||||
FRESHNESS_CONTRACT,
|
||||
QUOTING_CONTRACT,
|
||||
HAS_DESCRIPTION_CONTRACT,
|
||||
HAS_UNIQUE_ID_CONTRACT,
|
||||
HAS_DOCREFS_CONTRACT,
|
||||
HAS_RELATION_METADATA_CONTRACT,
|
||||
HAS_FQN_CONTRACT,
|
||||
{
|
||||
'description': (
|
||||
'A source table definition, as parsed from the one provided in the'
|
||||
'"tables" subsection of the "sources" section of schema.yml'
|
||||
),
|
||||
'properties': {
|
||||
'name': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The name of this node, which is the name of the model it'
|
||||
'refers to'
|
||||
),
|
||||
'minLength': 1,
|
||||
},
|
||||
'source_name': {
|
||||
'type': 'string',
|
||||
'description': 'The reference name of the source definition',
|
||||
'minLength': 1,
|
||||
},
|
||||
'source_description': {
|
||||
'type': 'string',
|
||||
'description': 'The user-supplied description of the source',
|
||||
},
|
||||
'loader': {
|
||||
'type': 'string',
|
||||
'description': 'The user-defined loader for this source',
|
||||
},
|
||||
'identifier': {
|
||||
'type': 'string',
|
||||
'description': 'The identifier for the source table',
|
||||
'minLength': 1,
|
||||
},
|
||||
# the manifest search stuff really requires this, sadly
|
||||
'resource_type': {
|
||||
'enum': [NodeType.Source],
|
||||
},
|
||||
},
|
||||
# note that while required, loaded_at_field and freshness may be null
|
||||
'required': [
|
||||
'source_name', 'source_description', 'loaded_at_field', 'loader',
|
||||
'freshness', 'description', 'columns', 'docrefs', 'identifier',
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ParsedSourceDefinition(APIObject):
|
||||
SCHEMA = PARSED_SOURCE_DEFINITION_CONTRACT
|
||||
is_ephemeral_model = False
|
||||
|
||||
def to_shallow_dict(self):
|
||||
return self._contents.copy()
|
||||
|
||||
# provide some emtpy/meaningless properties so these look more like
|
||||
# ParsedNodes
|
||||
@property
|
||||
def depends_on_nodes(self):
|
||||
return []
|
||||
|
||||
@property
|
||||
def refs(self):
|
||||
return []
|
||||
|
||||
@property
|
||||
def sources(self):
|
||||
return []
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
return []
|
||||
|
||||
@property
|
||||
def has_freshness(self):
|
||||
return bool(self.freshness) and self.loaded_at_field is not None
|
||||
@@ -27,27 +27,36 @@ UNPARSED_BASE_CONTRACT = {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'Relative path to the originating file from the project root.'
|
||||
),
|
||||
},
|
||||
),
|
||||
}
|
||||
},
|
||||
'required': ['package_name', 'root_path', 'path', 'original_file_path']
|
||||
}
|
||||
|
||||
UNPARSED_HAS_SQL_CONTRACT = {
|
||||
'properties': {
|
||||
'raw_sql': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'For nodes defined in SQL files, this is just the contents '
|
||||
'of that file. For schema tests, archives, etc. this is '
|
||||
'of that file. For schema tests, snapshots, etc. this is '
|
||||
'generated by dbt.'),
|
||||
},
|
||||
'index': {
|
||||
'type': 'integer',
|
||||
}
|
||||
},
|
||||
'required': ['package_name', 'root_path', 'path', 'original_file_path',
|
||||
'raw_sql']
|
||||
'required': ['raw_sql']
|
||||
}
|
||||
|
||||
UNPARSED_MACRO_CONTRACT = UNPARSED_BASE_CONTRACT
|
||||
UNPARSED_MACRO_CONTRACT = deep_merge(
|
||||
UNPARSED_BASE_CONTRACT,
|
||||
UNPARSED_HAS_SQL_CONTRACT
|
||||
)
|
||||
|
||||
UNPARSED_NODE_CONTRACT = deep_merge(
|
||||
UNPARSED_BASE_CONTRACT,
|
||||
UNPARSED_HAS_SQL_CONTRACT,
|
||||
{
|
||||
'properties': {
|
||||
'name': {
|
||||
@@ -62,18 +71,15 @@ UNPARSED_NODE_CONTRACT = deep_merge(
|
||||
NodeType.Model,
|
||||
NodeType.Test,
|
||||
NodeType.Analysis,
|
||||
# Note: Hooks fail if you remove this, even though it's
|
||||
# also allowed in ParsedMacro, which seems wrong.
|
||||
# Maybe need to move hook operations into macros?
|
||||
NodeType.Operation,
|
||||
NodeType.Seed,
|
||||
# we need this if parse_node is going to handle archives.
|
||||
NodeType.Archive,
|
||||
# we need this if parse_node is going to handle snapshots.
|
||||
NodeType.Snapshot,
|
||||
NodeType.RPCCall,
|
||||
]
|
||||
},
|
||||
},
|
||||
'required': UNPARSED_BASE_CONTRACT['required'] + [
|
||||
'resource_type', 'name']
|
||||
'required': ['resource_type', 'name']
|
||||
}
|
||||
)
|
||||
|
||||
@@ -115,13 +121,17 @@ COLUMN_TEST_CONTRACT = {
|
||||
}
|
||||
|
||||
|
||||
UNPARSED_NODE_UPDATE_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'A collection of the unparsed node updates, as provided in the '
|
||||
'"models" section of schema.yml'
|
||||
),
|
||||
UNPARSED_COLUMN_DESCRIPTION_CONTRACT = {
|
||||
'properties': {
|
||||
'columns': {
|
||||
'type': 'array',
|
||||
'items': COLUMN_TEST_CONTRACT,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
UNPARSED_NODE_DESCRIPTION_CONTRACT = {
|
||||
'properties': {
|
||||
'name': {
|
||||
'type': 'string',
|
||||
@@ -137,10 +147,6 @@ UNPARSED_NODE_UPDATE_CONTRACT = {
|
||||
'The raw string description of the node after parsing the yaml'
|
||||
),
|
||||
},
|
||||
'columns': {
|
||||
'type': 'array',
|
||||
'items': COLUMN_TEST_CONTRACT,
|
||||
},
|
||||
'tests': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
@@ -155,13 +161,183 @@ UNPARSED_NODE_UPDATE_CONTRACT = {
|
||||
}
|
||||
|
||||
|
||||
UNPARSED_NODE_UPDATE_CONTRACT = deep_merge(
|
||||
UNPARSED_NODE_DESCRIPTION_CONTRACT,
|
||||
UNPARSED_COLUMN_DESCRIPTION_CONTRACT,
|
||||
{
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'A collection of the unparsed node updates, as provided in the '
|
||||
'"models" section of schema.yml'
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class UnparsedNodeUpdate(APIObject):
|
||||
"""An unparsed node update is the blueprint for tests to be added and nodes
|
||||
to be updated, referencing a certain node (specifically, a Model).
|
||||
to be updated, referencing a certain node (specifically, a Model or
|
||||
Source).
|
||||
"""
|
||||
SCHEMA = UNPARSED_NODE_UPDATE_CONTRACT
|
||||
|
||||
|
||||
TIME_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'count': {
|
||||
'type': 'integer',
|
||||
},
|
||||
'period': {
|
||||
'enum': ['minute', 'hour', 'day'],
|
||||
},
|
||||
},
|
||||
'required': ['count', 'period'],
|
||||
}
|
||||
|
||||
|
||||
_FRESHNESS_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'warn_after': {
|
||||
'anyOf': [
|
||||
{'type': 'null'},
|
||||
TIME_CONTRACT,
|
||||
]
|
||||
},
|
||||
'error_after': {
|
||||
'anyOf': [
|
||||
{'type': 'null'},
|
||||
TIME_CONTRACT,
|
||||
|
||||
]
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
_QUOTING_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'database': {'type': 'boolean'},
|
||||
'schema': {'type': 'boolean'},
|
||||
'identifier': {'type': 'boolean'},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
QUOTING_CONTRACT = {
|
||||
'properties': {
|
||||
'quoting': {
|
||||
'anyOf': [
|
||||
{'type': 'null'},
|
||||
_QUOTING_CONTRACT,
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
FRESHNESS_CONTRACT = {
|
||||
'properties': {
|
||||
'loaded_at_field': {
|
||||
'type': ['null', 'string'],
|
||||
'description': 'The field to use as the "loaded at" timestamp',
|
||||
},
|
||||
'freshness': {
|
||||
'anyOf': [
|
||||
{'type': 'null'},
|
||||
_FRESHNESS_CONTRACT,
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT = deep_merge(
|
||||
UNPARSED_NODE_DESCRIPTION_CONTRACT,
|
||||
UNPARSED_COLUMN_DESCRIPTION_CONTRACT,
|
||||
FRESHNESS_CONTRACT,
|
||||
QUOTING_CONTRACT,
|
||||
{
|
||||
'description': (
|
||||
'A source table definition, as provided in the "tables" '
|
||||
'subsection of the "sources" section of schema.yml'
|
||||
),
|
||||
'properties': {
|
||||
'identifier': {
|
||||
'type': 'string',
|
||||
'description': 'The identifier for the source table',
|
||||
'minLength': 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
UNPARSED_SOURCE_DEFINITION_CONTRACT = deep_merge(
|
||||
FRESHNESS_CONTRACT,
|
||||
QUOTING_CONTRACT,
|
||||
{
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'A collection of the unparsed sources, as provided in the '
|
||||
'"sources" section of schema.yml'
|
||||
),
|
||||
'properties': {
|
||||
'name': {
|
||||
'type': 'string',
|
||||
'description': 'The reference name of the source definition',
|
||||
'minLength': 1,
|
||||
},
|
||||
'loader': {
|
||||
'type': 'string',
|
||||
'description': 'The user-defined loader for this source',
|
||||
'minLength': 1,
|
||||
},
|
||||
'description': {
|
||||
'type': 'string',
|
||||
'description': 'The user-supplied description of the source',
|
||||
},
|
||||
'database': {
|
||||
'type': 'string',
|
||||
'description': 'The database name for the source table',
|
||||
'minLength': 1,
|
||||
},
|
||||
'schema': {
|
||||
'type': 'string',
|
||||
'description': 'The schema name for the source table',
|
||||
'minLength': 1,
|
||||
},
|
||||
'tables': {
|
||||
'type': 'array',
|
||||
'items': UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT,
|
||||
'description': 'The tables for this source',
|
||||
'minLength': 1,
|
||||
},
|
||||
},
|
||||
'required': ['name'],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class UnparsedTableDefinition(APIObject):
|
||||
SCHEMA = UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT
|
||||
|
||||
|
||||
class UnparsedSourceDefinition(APIObject):
|
||||
SCHEMA = UNPARSED_SOURCE_DEFINITION_CONTRACT
|
||||
|
||||
@property
|
||||
def tables(self):
|
||||
return [UnparsedTableDefinition(**t) for t in self.get('tables', [])]
|
||||
|
||||
|
||||
UNPARSED_DOCUMENTATION_FILE_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -185,7 +361,7 @@ UNPARSED_DOCUMENTATION_FILE_CONTRACT = {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'Relative path to the originating file from the project root.'
|
||||
),
|
||||
),
|
||||
},
|
||||
'file_contents': {
|
||||
'type': 'string',
|
||||
@@ -1,11 +1,8 @@
|
||||
from dbt.api.object import APIObject
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
from dbt.utils import deep_merge
|
||||
from dbt.contracts.connection import POSTGRES_CREDENTIALS_CONTRACT, \
|
||||
REDSHIFT_CREDENTIALS_CONTRACT, SNOWFLAKE_CREDENTIALS_CONTRACT, \
|
||||
BIGQUERY_CREDENTIALS_CONTRACT
|
||||
|
||||
# TODO: add description fields.
|
||||
|
||||
ARCHIVE_TABLE_CONFIG_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -23,11 +20,13 @@ ARCHIVE_CONFIG_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'source_database': {'type': 'string'},
|
||||
'target_database': {'type': 'string'},
|
||||
'source_schema': {'type': 'string'},
|
||||
'target_schema': {'type': 'string'},
|
||||
'tables': {
|
||||
'type': 'array',
|
||||
'item': ARCHIVE_TABLE_CONFIG_CONTRACT,
|
||||
'items': ARCHIVE_TABLE_CONFIG_CONTRACT,
|
||||
}
|
||||
},
|
||||
'required': ['source_schema', 'target_schema', 'tables'],
|
||||
@@ -92,6 +91,10 @@ PROJECT_CONTRACT = {
|
||||
'target-path': {
|
||||
'type': 'string',
|
||||
},
|
||||
'snapshot-paths': {
|
||||
'type': 'array',
|
||||
'items': {'type': 'string'},
|
||||
},
|
||||
'clean-targets': {
|
||||
'type': 'array',
|
||||
'items': {'type': 'string'},
|
||||
@@ -143,6 +146,11 @@ PROJECT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
},
|
||||
# we validate the regex separately, using the pattern in dbt.semver
|
||||
'require-dbt-version': {
|
||||
'type': ['string', 'array'],
|
||||
'items': {'type': 'string'},
|
||||
},
|
||||
},
|
||||
'required': ['name', 'version'],
|
||||
}
|
||||
@@ -177,14 +185,45 @@ GIT_PACKAGE_CONTRACT = {
|
||||
},
|
||||
'revision': {
|
||||
'type': ['string', 'array'],
|
||||
'item': 'string',
|
||||
'items': {'type': 'string'},
|
||||
'description': 'The git revision to use, if it is not tip',
|
||||
},
|
||||
'warn-unpinned': {
|
||||
'type': 'boolean',
|
||||
}
|
||||
},
|
||||
'required': ['git'],
|
||||
}
|
||||
|
||||
|
||||
VERSION_SPECIFICATION_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'major': {
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'minor': {
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'patch': {
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'prerelease': {
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'build': {
|
||||
'type': ['string', 'null'],
|
||||
},
|
||||
'matcher': {
|
||||
'type': 'string',
|
||||
'enum': ['=', '>=', '<=', '>', '<'],
|
||||
},
|
||||
},
|
||||
'required': ['major', 'minor', 'patch', 'prerelease', 'build', 'matcher'],
|
||||
}
|
||||
|
||||
|
||||
REGISTRY_PACKAGE_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -194,30 +233,20 @@ REGISTRY_PACKAGE_CONTRACT = {
|
||||
'description': 'The name of the package',
|
||||
},
|
||||
'version': {
|
||||
'type': 'string',
|
||||
'type': ['string', 'array'],
|
||||
'items': {
|
||||
'anyOf': [
|
||||
VERSION_SPECIFICATION_CONTRACT,
|
||||
{'type': 'string'}
|
||||
],
|
||||
},
|
||||
'description': 'The version of the package',
|
||||
},
|
||||
},
|
||||
'required': ['package'],
|
||||
'required': ['package', 'version'],
|
||||
}
|
||||
|
||||
|
||||
class Package(APIObject):
|
||||
SCHEMA = NotImplemented
|
||||
|
||||
|
||||
class LocalPackage(Package):
|
||||
SCHEMA = LOCAL_PACKAGE_CONTRACT
|
||||
|
||||
|
||||
class GitPackage(Package):
|
||||
SCHEMA = GIT_PACKAGE_CONTRACT
|
||||
|
||||
|
||||
class RegistryPackage(Package):
|
||||
SCHEMA = REGISTRY_PACKAGE_CONTRACT
|
||||
|
||||
|
||||
PACKAGE_FILE_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -237,10 +266,49 @@ PACKAGE_FILE_CONTRACT = {
|
||||
}
|
||||
|
||||
|
||||
# the metadata from the registry has extra things that we don't care about.
|
||||
REGISTRY_PACKAGE_METADATA_CONTRACT = deep_merge(
|
||||
PACKAGE_FILE_CONTRACT,
|
||||
{
|
||||
'additionalProperties': True,
|
||||
'properties': {
|
||||
'name': {
|
||||
'type': 'string',
|
||||
},
|
||||
'downloads': {
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
'properties': {
|
||||
'tarball': {
|
||||
'type': 'string',
|
||||
},
|
||||
},
|
||||
'required': ['tarball']
|
||||
},
|
||||
},
|
||||
'required': PACKAGE_FILE_CONTRACT['required'][:] + ['downloads']
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class PackageConfig(APIObject):
|
||||
SCHEMA = PACKAGE_FILE_CONTRACT
|
||||
|
||||
|
||||
USER_CONFIG_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
'properties': {
|
||||
'send_anonymous_usage_stats': {
|
||||
'type': 'boolean',
|
||||
},
|
||||
'use_colors': {
|
||||
'type': 'boolean',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
PROFILE_INFO_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
@@ -251,27 +319,17 @@ PROFILE_INFO_CONTRACT = {
|
||||
'target_name': {
|
||||
'type': 'string',
|
||||
},
|
||||
'send_anonymous_usage_stats': {
|
||||
'type': 'boolean',
|
||||
},
|
||||
'use_colors': {
|
||||
'type': 'boolean',
|
||||
},
|
||||
'config': USER_CONFIG_CONTRACT,
|
||||
'threads': {
|
||||
'type': 'number',
|
||||
},
|
||||
'credentials': {
|
||||
'anyOf': [
|
||||
POSTGRES_CREDENTIALS_CONTRACT,
|
||||
REDSHIFT_CREDENTIALS_CONTRACT,
|
||||
SNOWFLAKE_CREDENTIALS_CONTRACT,
|
||||
BIGQUERY_CREDENTIALS_CONTRACT,
|
||||
],
|
||||
'type': 'object',
|
||||
'additionalProperties': True,
|
||||
},
|
||||
},
|
||||
'required': [
|
||||
'profile_name', 'target_name', 'send_anonymous_usage_stats',
|
||||
'use_colors', 'threads', 'credentials'
|
||||
'profile_name', 'target_name', 'config', 'threads', 'credentials'
|
||||
],
|
||||
}
|
||||
|
||||
@@ -313,6 +371,15 @@ CONFIG_CONTRACT = deep_merge(
|
||||
)
|
||||
|
||||
|
||||
def update_config_contract(typename, connection):
|
||||
PROFILE_INFO_CONTRACT['properties']['credentials']['anyOf'].append(
|
||||
connection.SCHEMA
|
||||
)
|
||||
CONFIG_CONTRACT['properties']['credentials']['anyOf'].append(
|
||||
connection.SCHEMA
|
||||
)
|
||||
|
||||
|
||||
class Configuration(APIObject):
|
||||
SCHEMA = CONFIG_CONTRACT
|
||||
|
||||
544
core/dbt/contracts/results.py
Normal file
544
core/dbt/contracts/results.py
Normal file
@@ -0,0 +1,544 @@
|
||||
from dbt.api.object import APIObject
|
||||
from dbt.utils import deep_merge, timestring
|
||||
from dbt.contracts.common import named_property
|
||||
from dbt.contracts.graph.manifest import COMPILE_RESULT_NODE_CONTRACT
|
||||
from dbt.contracts.graph.unparsed import TIME_CONTRACT
|
||||
from dbt.contracts.graph.parsed import PARSED_SOURCE_DEFINITION_CONTRACT
|
||||
|
||||
|
||||
TIMING_INFO_CONTRACT = {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'name': {
|
||||
'type': 'string',
|
||||
},
|
||||
'started_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
'completed_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TimingInfo(APIObject):
|
||||
|
||||
SCHEMA = TIMING_INFO_CONTRACT
|
||||
|
||||
@classmethod
|
||||
def create(cls, name):
|
||||
return cls(name=name)
|
||||
|
||||
def begin(self):
|
||||
self.set('started_at', timestring())
|
||||
|
||||
def end(self):
|
||||
self.set('completed_at', timestring())
|
||||
|
||||
|
||||
class collect_timing_info:
|
||||
def __init__(self, name):
|
||||
self.timing_info = TimingInfo.create(name)
|
||||
|
||||
def __enter__(self):
|
||||
self.timing_info.begin()
|
||||
return self.timing_info
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.timing_info.end()
|
||||
|
||||
|
||||
class NodeSerializable(APIObject):
|
||||
|
||||
def serialize(self):
|
||||
result = super(NodeSerializable, self).serialize()
|
||||
result['node'] = self.node.serialize()
|
||||
return result
|
||||
|
||||
|
||||
PARTIAL_RESULT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': 'The partial result of a single node being run',
|
||||
'properties': {
|
||||
'error': {
|
||||
'type': ['string', 'null'],
|
||||
'description': 'The error string, or None if there was no error',
|
||||
},
|
||||
'status': {
|
||||
'type': ['string', 'null', 'number', 'boolean'],
|
||||
'description': 'The status result of the node execution',
|
||||
},
|
||||
'execution_time': {
|
||||
'type': 'number',
|
||||
'description': 'The execution time, in seconds',
|
||||
},
|
||||
'thread_id': {
|
||||
'type': ['string', 'null'],
|
||||
'description': 'ID of the executing thread, e.g. Thread-3',
|
||||
},
|
||||
'timing': {
|
||||
'type': 'array',
|
||||
'items': TIMING_INFO_CONTRACT,
|
||||
},
|
||||
'node': COMPILE_RESULT_NODE_CONTRACT,
|
||||
},
|
||||
'required': ['node', 'status', 'error', 'execution_time', 'thread_id',
|
||||
'timing'],
|
||||
}
|
||||
|
||||
|
||||
class PartialResult(NodeSerializable):
|
||||
"""Represent a "partial" execution result, i.e. one that has not (fully)
|
||||
been executed.
|
||||
|
||||
This may be an ephemeral node (they are not compiled) or any error.
|
||||
"""
|
||||
SCHEMA = PARTIAL_RESULT_CONTRACT
|
||||
|
||||
def __init__(self, node, error=None, status=None, execution_time=0,
|
||||
thread_id=None, timing=None):
|
||||
if timing is None:
|
||||
timing = []
|
||||
super(PartialResult, self).__init__(
|
||||
node=node,
|
||||
error=error,
|
||||
status=status,
|
||||
execution_time=execution_time,
|
||||
thread_id=thread_id,
|
||||
timing=timing,
|
||||
)
|
||||
|
||||
# if the result got to the point where it could be skipped/failed, we would
|
||||
# be returning a real result, not a partial.
|
||||
@property
|
||||
def skipped(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def failed(self):
|
||||
return None
|
||||
|
||||
|
||||
RUN_MODEL_RESULT_CONTRACT = deep_merge(PARTIAL_RESULT_CONTRACT, {
|
||||
'description': 'The result of a single node being run',
|
||||
'properties': {
|
||||
'skip': {
|
||||
'type': 'boolean',
|
||||
'description': 'True if this node was skipped',
|
||||
},
|
||||
'warn': {
|
||||
'type': ['boolean', 'null'],
|
||||
'description': 'True if this node succeeded with a warning',
|
||||
},
|
||||
'fail': {
|
||||
'type': ['boolean', 'null'],
|
||||
'description': 'On tests, true if the test failed',
|
||||
},
|
||||
},
|
||||
'required': ['skip', 'fail', 'warn']
|
||||
})
|
||||
|
||||
|
||||
class RunModelResult(NodeSerializable):
|
||||
SCHEMA = RUN_MODEL_RESULT_CONTRACT
|
||||
|
||||
def __init__(self, node, error=None, skip=False, status=None, failed=None,
|
||||
warned=None, thread_id=None, timing=None, execution_time=0):
|
||||
if timing is None:
|
||||
timing = []
|
||||
super(RunModelResult, self).__init__(
|
||||
node=node,
|
||||
error=error,
|
||||
skip=skip,
|
||||
status=status,
|
||||
fail=failed,
|
||||
warn=warned,
|
||||
execution_time=execution_time,
|
||||
thread_id=thread_id,
|
||||
timing=timing,
|
||||
)
|
||||
|
||||
# these all get set after the fact, generally
|
||||
error = named_property('error',
|
||||
'If there was an error, the text of that error')
|
||||
skip = named_property('skip', 'True if the model was skipped')
|
||||
warn = named_property('warn', 'True if this was a test and it warned')
|
||||
fail = named_property('fail', 'True if this was a test and it failed')
|
||||
status = named_property('status', 'The status of the model execution')
|
||||
execution_time = named_property('execution_time',
|
||||
'The time in seconds to execute the model')
|
||||
thread_id = named_property(
|
||||
'thread_id',
|
||||
'ID of the executing thread, e.g. Thread-3'
|
||||
)
|
||||
timing = named_property(
|
||||
'timing',
|
||||
'List of TimingInfo objects'
|
||||
)
|
||||
|
||||
@property
|
||||
def failed(self):
|
||||
return self.fail
|
||||
|
||||
@property
|
||||
def warned(self):
|
||||
return self.warn
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
return self.skip
|
||||
|
||||
|
||||
EXECUTION_RESULT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': 'The result of a single dbt invocation',
|
||||
'properties': {
|
||||
'results': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'anyOf': [
|
||||
RUN_MODEL_RESULT_CONTRACT,
|
||||
PARTIAL_RESULT_CONTRACT,
|
||||
]
|
||||
},
|
||||
'description': 'An array of results, one per model',
|
||||
},
|
||||
'generated_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
'description': (
|
||||
'The time at which the execution result was generated'
|
||||
),
|
||||
},
|
||||
'elapsed_time': {
|
||||
'type': 'number',
|
||||
'description': (
|
||||
'The time elapsed from before_run to after_run (hooks are not '
|
||||
'included)'
|
||||
),
|
||||
}
|
||||
},
|
||||
'required': ['results', 'generated_at', 'elapsed_time'],
|
||||
}
|
||||
|
||||
|
||||
class ExecutionResult(APIObject):
|
||||
SCHEMA = EXECUTION_RESULT_CONTRACT
|
||||
|
||||
def serialize(self):
|
||||
return {
|
||||
'results': [r.serialize() for r in self.results],
|
||||
'generated_at': self.generated_at,
|
||||
'elapsed_time': self.elapsed_time,
|
||||
}
|
||||
|
||||
|
||||
SOURCE_FRESHNESS_RESULT_CONTRACT = deep_merge(PARTIAL_RESULT_CONTRACT, {
|
||||
'properties': {
|
||||
'max_loaded_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
'snapshotted_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
'age': {
|
||||
'type': 'number',
|
||||
},
|
||||
'status': {
|
||||
'enum': ['pass', 'warn', 'error']
|
||||
},
|
||||
'node': PARSED_SOURCE_DEFINITION_CONTRACT,
|
||||
},
|
||||
'required': ['max_loaded_at', 'snapshotted_at', 'age']
|
||||
})
|
||||
|
||||
|
||||
class SourceFreshnessResult(NodeSerializable):
|
||||
SCHEMA = SOURCE_FRESHNESS_RESULT_CONTRACT
|
||||
|
||||
def __init__(self, node, max_loaded_at, snapshotted_at,
|
||||
age, status, thread_id, error=None,
|
||||
timing=None, execution_time=0):
|
||||
max_loaded_at = max_loaded_at.isoformat()
|
||||
snapshotted_at = snapshotted_at.isoformat()
|
||||
if timing is None:
|
||||
timing = []
|
||||
super(SourceFreshnessResult, self).__init__(
|
||||
node=node,
|
||||
max_loaded_at=max_loaded_at,
|
||||
snapshotted_at=snapshotted_at,
|
||||
age=age,
|
||||
status=status,
|
||||
thread_id=thread_id,
|
||||
error=error,
|
||||
timing=timing,
|
||||
execution_time=execution_time
|
||||
)
|
||||
|
||||
@property
|
||||
def failed(self):
|
||||
return self.status == 'error'
|
||||
|
||||
@property
|
||||
def warned(self):
|
||||
return self.status == 'warn'
|
||||
|
||||
@property
|
||||
def skipped(self):
|
||||
return False
|
||||
|
||||
|
||||
FRESHNESS_METADATA_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'generated_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
'description': (
|
||||
'The time at which the execution result was generated'
|
||||
),
|
||||
},
|
||||
'elapsed_time': {
|
||||
'type': 'number',
|
||||
'description': (
|
||||
'The time elapsed from before_run to after_run (hooks '
|
||||
'are not included)'
|
||||
),
|
||||
},
|
||||
},
|
||||
'required': ['generated_at', 'elapsed_time']
|
||||
}
|
||||
|
||||
|
||||
FRESHNESS_RESULTS_CONTRACT = deep_merge(FRESHNESS_METADATA_CONTRACT, {
|
||||
'description': 'The result of a single dbt source freshness invocation',
|
||||
'properties': {
|
||||
'results': {
|
||||
'type': 'array',
|
||||
'items': {
|
||||
'anyOf': [
|
||||
PARTIAL_RESULT_CONTRACT,
|
||||
SOURCE_FRESHNESS_RESULT_CONTRACT,
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
'required': ['results'],
|
||||
})
|
||||
|
||||
|
||||
class FreshnessExecutionResult(APIObject):
|
||||
SCHEMA = FRESHNESS_RESULTS_CONTRACT
|
||||
|
||||
def __init__(self, elapsed_time, generated_at, results):
|
||||
super(FreshnessExecutionResult, self).__init__(
|
||||
elapsed_time=elapsed_time,
|
||||
generated_at=generated_at,
|
||||
results=results
|
||||
)
|
||||
|
||||
def serialize(self):
|
||||
return {
|
||||
'generated_at': self.generated_at,
|
||||
'elapsed_time': self.elapsed_time,
|
||||
'results': [s.serialize() for s in self.results]
|
||||
}
|
||||
|
||||
def write(self, path):
|
||||
"""Create a new object with the desired output schema and write it."""
|
||||
meta = {
|
||||
'generated_at': self.generated_at,
|
||||
'elapsed_time': self.elapsed_time,
|
||||
}
|
||||
sources = {}
|
||||
for result in self.results:
|
||||
unique_id = result.node.unique_id
|
||||
if result.error is not None:
|
||||
result_dict = {
|
||||
'error': result.error,
|
||||
'state': 'runtime error'
|
||||
}
|
||||
else:
|
||||
result_dict = {
|
||||
'max_loaded_at': result.max_loaded_at,
|
||||
'snapshotted_at': result.snapshotted_at,
|
||||
'max_loaded_at_time_ago_in_s': result.age,
|
||||
'state': result.status,
|
||||
'criteria': result.node.freshness,
|
||||
}
|
||||
sources[unique_id] = result_dict
|
||||
output = FreshnessRunOutput(meta=meta, sources=sources)
|
||||
output.write(path)
|
||||
|
||||
|
||||
def _copykeys(src, keys, **updates):
|
||||
return {k: getattr(src, k) for k in keys}
|
||||
|
||||
|
||||
SOURCE_FRESHNESS_OUTPUT_ERROR_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'The source freshness output for a single source table',
|
||||
),
|
||||
'properties': {
|
||||
'error': {
|
||||
'type': 'string',
|
||||
'description': 'The error string',
|
||||
},
|
||||
'state': {
|
||||
'enum': ['runtime error'],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
SOURCE_FRESHNESS_OUTPUT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'The source freshness output for a single source table',
|
||||
),
|
||||
'properties': {
|
||||
'max_loaded_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
'snapshotted_at': {
|
||||
'type': 'string',
|
||||
'format': 'date-time',
|
||||
},
|
||||
'max_loaded_at_time_ago_in_s': {
|
||||
'type': 'number',
|
||||
},
|
||||
'state': {
|
||||
'enum': ['pass', 'warn', 'error']
|
||||
},
|
||||
'criteria': {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'warn_after': TIME_CONTRACT,
|
||||
'error_after': TIME_CONTRACT,
|
||||
},
|
||||
},
|
||||
'required': ['state', 'criteria', 'max_loaded_at', 'snapshotted_at',
|
||||
'max_loaded_at_time_ago_in_s']
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FRESHNESS_RUN_OUTPUT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': 'The output contract for dbt source freshness invocations',
|
||||
'properties': {
|
||||
'meta': FRESHNESS_METADATA_CONTRACT,
|
||||
'sources': {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'description': (
|
||||
'A collection of the source results, stored by their unique '
|
||||
'IDs.'
|
||||
),
|
||||
'patternProperties': {
|
||||
'.*': {
|
||||
'anyOf': [
|
||||
SOURCE_FRESHNESS_OUTPUT_ERROR_CONTRACT,
|
||||
SOURCE_FRESHNESS_OUTPUT_CONTRACT
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class FreshnessRunOutput(APIObject):
|
||||
SCHEMA = FRESHNESS_RUN_OUTPUT_CONTRACT
|
||||
|
||||
def __init__(self, meta, sources):
|
||||
super(FreshnessRunOutput, self).__init__(meta=meta, sources=sources)
|
||||
|
||||
|
||||
REMOTE_COMPILE_RESULT_CONTRACT = {
|
||||
'type': 'object',
|
||||
'additionalProperties': False,
|
||||
'properties': {
|
||||
'raw_sql': {
|
||||
'type': 'string',
|
||||
},
|
||||
'compiled_sql': {
|
||||
'type': 'string',
|
||||
},
|
||||
'timing': {
|
||||
'type': 'array',
|
||||
'items': TIMING_INFO_CONTRACT,
|
||||
},
|
||||
},
|
||||
'required': ['raw_sql', 'compiled_sql', 'timing']
|
||||
}
|
||||
|
||||
|
||||
class RemoteCompileResult(APIObject):
|
||||
SCHEMA = REMOTE_COMPILE_RESULT_CONTRACT
|
||||
|
||||
def __init__(self, raw_sql, compiled_sql, node, timing=None, **kwargs):
|
||||
if timing is None:
|
||||
timing = []
|
||||
# this should not show up in the serialized output.
|
||||
self.node = node
|
||||
super(RemoteCompileResult, self).__init__(
|
||||
raw_sql=raw_sql,
|
||||
compiled_sql=compiled_sql,
|
||||
timing=timing,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
return None
|
||||
|
||||
|
||||
REMOTE_RUN_RESULT_CONTRACT = deep_merge(REMOTE_COMPILE_RESULT_CONTRACT, {
|
||||
'properties': {
|
||||
'table': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'column_names': {
|
||||
'type': 'array',
|
||||
'items': {'type': 'string'},
|
||||
},
|
||||
'rows': {
|
||||
'type': 'array',
|
||||
# any item type is ok
|
||||
},
|
||||
},
|
||||
'required': ['rows', 'column_names'],
|
||||
},
|
||||
},
|
||||
'required': ['table'],
|
||||
})
|
||||
|
||||
|
||||
class RemoteRunResult(RemoteCompileResult):
|
||||
SCHEMA = REMOTE_RUN_RESULT_CONTRACT
|
||||
|
||||
def __init__(self, raw_sql, compiled_sql, node, timing=None, table=None):
|
||||
if table is None:
|
||||
table = []
|
||||
super(RemoteRunResult, self).__init__(
|
||||
raw_sql=raw_sql,
|
||||
compiled_sql=compiled_sql,
|
||||
timing=timing,
|
||||
table=table,
|
||||
node=node
|
||||
)
|
||||
97
core/dbt/deprecations.py
Normal file
97
core/dbt/deprecations.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import dbt.links
|
||||
import dbt.flags
|
||||
|
||||
|
||||
class DBTDeprecation(object):
|
||||
name = None
|
||||
description = None
|
||||
|
||||
def show(self, *args, **kwargs):
|
||||
if self.name not in active_deprecations:
|
||||
desc = self.description.format(**kwargs)
|
||||
dbt.exceptions.warn_or_error(
|
||||
"* Deprecation Warning: {}\n".format(desc)
|
||||
)
|
||||
active_deprecations.add(self.name)
|
||||
|
||||
|
||||
class DBTRepositoriesDeprecation(DBTDeprecation):
|
||||
name = "repositories"
|
||||
description = """The dbt_project.yml configuration option 'repositories' is
|
||||
deprecated. Please place dependencies in the `packages.yml` file instead.
|
||||
The 'repositories' option will be removed in a future version of dbt.
|
||||
|
||||
For more information, see: https://docs.getdbt.com/docs/package-management
|
||||
|
||||
# Example packages.yml contents:
|
||||
|
||||
{recommendation}
|
||||
"""
|
||||
|
||||
|
||||
class GenerateSchemaNameSingleArgDeprecated(DBTDeprecation):
|
||||
name = 'generate-schema-name-single-arg'
|
||||
description = '''As of dbt v0.14.0, the `generate_schema_name` macro
|
||||
accepts a second "node" argument. The one-argument form of `generate_schema_name`
|
||||
is deprecated, and will become unsupported in a future release.
|
||||
|
||||
For more information, see:
|
||||
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
|
||||
''' # noqa
|
||||
|
||||
|
||||
class ArchiveDeprecated(DBTDeprecation):
|
||||
name = 'archives'
|
||||
description = '''As of dbt v0.14.0, the `dbt archive` command is renamed to
|
||||
`dbt snapshot` and "archives" are "snapshots". The `dbt archive` command will
|
||||
be removed in a future release.
|
||||
|
||||
For more information, see:
|
||||
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
|
||||
'''
|
||||
|
||||
|
||||
_adapter_renamed_description = """\
|
||||
The adapter function `adapter.{old_name}` is deprecated and will be removed in
|
||||
a future release of dbt. Please use `adapter.{new_name}` instead.
|
||||
Documentation for {new_name} can be found here:
|
||||
https://docs.getdbt.com/docs/adapter"""
|
||||
|
||||
|
||||
def renamed_method(old_name, new_name):
|
||||
class AdapterDeprecationWarning(DBTDeprecation):
|
||||
name = 'adapter:{}'.format(old_name)
|
||||
description = _adapter_renamed_description.format(old_name=old_name,
|
||||
new_name=new_name)
|
||||
|
||||
dep = AdapterDeprecationWarning()
|
||||
deprecations_list.append(dep)
|
||||
deprecations[dep.name] = dep
|
||||
|
||||
|
||||
def warn(name, *args, **kwargs):
|
||||
if name not in deprecations:
|
||||
# this should (hopefully) never happen
|
||||
raise RuntimeError(
|
||||
"Error showing deprecation warning: {}".format(name)
|
||||
)
|
||||
|
||||
deprecations[name].show(*args, **kwargs)
|
||||
|
||||
|
||||
# these are globally available
|
||||
# since modules are only imported once, active_deprecations is a singleton
|
||||
|
||||
active_deprecations = set()
|
||||
|
||||
deprecations_list = [
|
||||
DBTRepositoriesDeprecation(),
|
||||
GenerateSchemaNameSingleArgDeprecated(),
|
||||
ArchiveDeprecated(),
|
||||
]
|
||||
|
||||
deprecations = {d.name: d for d in deprecations_list}
|
||||
|
||||
|
||||
def reset_deprecations():
|
||||
active_deprecations.clear()
|
||||
@@ -1,13 +1,25 @@
|
||||
from dbt.compat import basestring
|
||||
import sys
|
||||
import six
|
||||
import functools
|
||||
|
||||
from dbt.compat import builtins
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
import re
|
||||
import dbt.flags
|
||||
|
||||
|
||||
class Exception(BaseException):
|
||||
pass
|
||||
class Exception(builtins.Exception):
|
||||
CODE = -32000
|
||||
MESSAGE = "Server Error"
|
||||
|
||||
def data(self):
|
||||
# if overriding, make sure the result is json-serializable.
|
||||
return {
|
||||
'type': self.__class__.__name__,
|
||||
'message': str(self),
|
||||
}
|
||||
|
||||
|
||||
class MacroReturn(BaseException):
|
||||
class MacroReturn(builtins.BaseException):
|
||||
"""
|
||||
Hack of all hacks
|
||||
"""
|
||||
@@ -21,6 +33,9 @@ class InternalException(Exception):
|
||||
|
||||
|
||||
class RuntimeException(RuntimeError, Exception):
|
||||
CODE = 10001
|
||||
MESSAGE = "Runtime error"
|
||||
|
||||
def __init__(self, msg, node=None):
|
||||
self.stack = []
|
||||
self.node = node
|
||||
@@ -80,8 +95,59 @@ class RuntimeException(RuntimeError, Exception):
|
||||
return lines[0] + "\n" + "\n".join(
|
||||
[" " + line for line in lines[1:]])
|
||||
|
||||
def data(self):
|
||||
result = Exception.data(self)
|
||||
if self.node is None:
|
||||
return result
|
||||
|
||||
result.update({
|
||||
'raw_sql': self.node.get('raw_sql'),
|
||||
'compiled_sql': self.node.get('injected_sql'),
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
class RPCFailureResult(RuntimeException):
|
||||
CODE = 10002
|
||||
MESSAGE = "RPC execution error"
|
||||
|
||||
|
||||
class RPCTimeoutException(RuntimeException):
|
||||
CODE = 10008
|
||||
MESSAGE = 'RPC timeout error'
|
||||
|
||||
def __init__(self, timeout):
|
||||
super(RPCTimeoutException, self).__init__(self.MESSAGE)
|
||||
self.timeout = timeout
|
||||
|
||||
def data(self):
|
||||
result = super(RPCTimeoutException, self).data()
|
||||
result.update({
|
||||
'timeout': self.timeout,
|
||||
'message': 'RPC timed out after {}s'.format(self.timeout),
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
class RPCKilledException(RuntimeException):
|
||||
CODE = 10009
|
||||
MESSAGE = 'RPC process killed'
|
||||
|
||||
def __init__(self, signum):
|
||||
self.signum = signum
|
||||
self.message = 'RPC process killed by signal {}'.format(self.signum)
|
||||
super(RPCKilledException, self).__init__(self.message)
|
||||
|
||||
def data(self):
|
||||
return {
|
||||
'signum': self.signum,
|
||||
'message': self.message,
|
||||
}
|
||||
|
||||
|
||||
class DatabaseException(RuntimeException):
|
||||
CODE = 10003
|
||||
MESSAGE = "Database Error"
|
||||
|
||||
def process_stack(self):
|
||||
lines = []
|
||||
@@ -98,6 +164,9 @@ class DatabaseException(RuntimeException):
|
||||
|
||||
|
||||
class CompilationException(RuntimeException):
|
||||
CODE = 10004
|
||||
MESSAGE = "Compilation Error"
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return 'Compilation'
|
||||
@@ -108,7 +177,8 @@ class RecursionException(RuntimeException):
|
||||
|
||||
|
||||
class ValidationException(RuntimeException):
|
||||
pass
|
||||
CODE = 10005
|
||||
MESSAGE = "Validation Error"
|
||||
|
||||
|
||||
class JSONValidationException(ValidationException):
|
||||
@@ -116,8 +186,9 @@ class JSONValidationException(ValidationException):
|
||||
self.typename = typename
|
||||
self.errors = errors
|
||||
self.errors_message = ', '.join(errors)
|
||||
msg = ('Invalid arguments passed to "{}" instance: {}'.format(
|
||||
self.typename, self.errors_message))
|
||||
msg = 'Invalid arguments passed to "{}" instance: {}'.format(
|
||||
self.typename, self.errors_message
|
||||
)
|
||||
super(JSONValidationException, self).__init__(msg)
|
||||
|
||||
def __reduce__(self):
|
||||
@@ -125,15 +196,20 @@ class JSONValidationException(ValidationException):
|
||||
return (JSONValidationException, (self.typename, self.errors))
|
||||
|
||||
|
||||
class ParsingException(Exception):
|
||||
class AliasException(ValidationException):
|
||||
pass
|
||||
|
||||
|
||||
class DependencyException(Exception):
|
||||
pass
|
||||
# this can happen due to raise_dependency_error and its callers
|
||||
CODE = 10006
|
||||
MESSAGE = "Dependency Error"
|
||||
|
||||
|
||||
class DbtConfigError(RuntimeException):
|
||||
CODE = 10007
|
||||
MESSAGE = "DBT Configuration Error"
|
||||
|
||||
def __init__(self, message, project=None, result_type='invalid_project'):
|
||||
self.project = project
|
||||
super(DbtConfigError, self).__init__(message)
|
||||
@@ -151,6 +227,8 @@ class DbtProfileError(DbtConfigError):
|
||||
class SemverException(Exception):
|
||||
def __init__(self, msg=None):
|
||||
self.msg = msg
|
||||
if msg is not None:
|
||||
super(SemverException, self).__init__(msg)
|
||||
|
||||
|
||||
class VersionsNotCompatibleException(SemverException):
|
||||
@@ -165,6 +243,45 @@ class FailedToConnectException(DatabaseException):
|
||||
pass
|
||||
|
||||
|
||||
class CommandError(RuntimeException):
|
||||
def __init__(self, cwd, cmd, message='Error running command'):
|
||||
super(CommandError, self).__init__(message)
|
||||
self.cwd = cwd
|
||||
self.cmd = cmd
|
||||
self.args = (cwd, cmd, message)
|
||||
|
||||
def __str__(self):
|
||||
if len(self.cmd) == 0:
|
||||
return '{}: No arguments given'.format(self.msg)
|
||||
return '{}: "{}"'.format(self.msg, self.cmd[0])
|
||||
|
||||
|
||||
class ExecutableError(CommandError):
|
||||
def __init__(self, cwd, cmd, message):
|
||||
super(ExecutableError, self).__init__(cwd, cmd, message)
|
||||
|
||||
|
||||
class WorkingDirectoryError(CommandError):
|
||||
def __init__(self, cwd, cmd, message):
|
||||
super(WorkingDirectoryError, self).__init__(cwd, cmd, message)
|
||||
|
||||
def __str__(self):
|
||||
return '{}: "{}"'.format(self.msg, self.cwd)
|
||||
|
||||
|
||||
class CommandResultError(CommandError):
|
||||
def __init__(self, cwd, cmd, returncode, stdout, stderr,
|
||||
message='Got a non-zero returncode'):
|
||||
super(CommandResultError, self).__init__(cwd, cmd, message)
|
||||
self.returncode = returncode
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.args = (cwd, cmd, returncode, stdout, stderr, message)
|
||||
|
||||
def __str__(self):
|
||||
return '{} running: {}'.format(self.msg, self.cmd)
|
||||
|
||||
|
||||
def raise_compiler_error(msg, node=None):
|
||||
raise CompilationException(msg, node)
|
||||
|
||||
@@ -177,18 +294,29 @@ def raise_dependency_error(msg):
|
||||
raise DependencyException(msg)
|
||||
|
||||
|
||||
def invalid_type_error(method_name, arg_name, got_value, expected_type,
|
||||
version='0.13.0'):
|
||||
"""Raise a CompilationException when an adapter method available to macros
|
||||
has changed.
|
||||
"""
|
||||
got_type = type(got_value)
|
||||
msg = ("As of {version}, 'adapter.{method_name}' expects argument "
|
||||
"'{arg_name}' to be of type '{expected_type}', instead got "
|
||||
"{got_value} ({got_type})")
|
||||
raise_compiler_error(msg.format(version=version, method_name=method_name,
|
||||
arg_name=arg_name, expected_type=expected_type,
|
||||
got_value=got_value, got_type=got_type))
|
||||
|
||||
|
||||
def ref_invalid_args(model, args):
|
||||
raise_compiler_error(
|
||||
"ref() takes at most two arguments ({} given)".format(len(args)),
|
||||
model)
|
||||
|
||||
|
||||
def ref_bad_context(model, target_model_name, target_model_package):
|
||||
ref_string = "{{ ref('" + target_model_name + "') }}"
|
||||
|
||||
if target_model_package is not None:
|
||||
ref_string = ("{{ ref('" + target_model_package +
|
||||
"', '" + target_model_name + "') }}")
|
||||
def ref_bad_context(model, args):
|
||||
ref_args = ', '.join("'{}'".format(a) for a in args)
|
||||
ref_string = '{{{{ ref({}) }}}}'.format(ref_args)
|
||||
|
||||
base_error_msg = """dbt was unable to infer all dependencies for the model "{model_name}".
|
||||
This typically happens when ref() is placed within a conditional block.
|
||||
@@ -230,21 +358,61 @@ def doc_target_not_found(model, target_doc_name, target_doc_package):
|
||||
raise_compiler_error(msg, model)
|
||||
|
||||
|
||||
def get_target_not_found_msg(model, target_model_name, target_model_package):
|
||||
def _get_target_failure_msg(model, target_model_name, target_model_package,
|
||||
include_path, reason):
|
||||
target_package_string = ''
|
||||
|
||||
if target_model_package is not None:
|
||||
target_package_string = "in package '{}' ".format(target_model_package)
|
||||
|
||||
return ("Model '{}' depends on model '{}' {}which was not found or is"
|
||||
" disabled".format(model.get('unique_id'),
|
||||
target_model_name,
|
||||
target_package_string))
|
||||
source_path_string = ''
|
||||
if include_path:
|
||||
source_path_string = ' ({})'.format(model.get('original_file_path'))
|
||||
|
||||
return ("{} '{}'{} depends on model '{}' {}which {}"
|
||||
.format(model.get('resource_type').title(),
|
||||
model.get('unique_id'),
|
||||
source_path_string,
|
||||
target_model_name,
|
||||
target_package_string,
|
||||
reason))
|
||||
|
||||
|
||||
def get_target_disabled_msg(model, target_model_name, target_model_package):
|
||||
return _get_target_failure_msg(model, target_model_name,
|
||||
target_model_package, include_path=True,
|
||||
reason='is disabled')
|
||||
|
||||
|
||||
def get_target_not_found_msg(model, target_model_name, target_model_package):
|
||||
return _get_target_failure_msg(model, target_model_name,
|
||||
target_model_package, include_path=True,
|
||||
reason='was not found')
|
||||
|
||||
|
||||
def get_target_not_found_or_disabled_msg(model, target_model_name,
|
||||
target_model_package):
|
||||
return _get_target_failure_msg(model, target_model_name,
|
||||
target_model_package, include_path=False,
|
||||
reason='was not found or is disabled')
|
||||
|
||||
|
||||
def ref_target_not_found(model, target_model_name, target_model_package):
|
||||
msg = get_target_not_found_msg(model, target_model_name,
|
||||
target_model_package)
|
||||
msg = get_target_not_found_or_disabled_msg(model, target_model_name,
|
||||
target_model_package)
|
||||
raise_compiler_error(msg, model)
|
||||
|
||||
|
||||
def source_disabled_message(model, target_name, target_table_name):
|
||||
return ("{} '{}' ({}) depends on source '{}.{}' which was not found"
|
||||
.format(model.get('resource_type').title(),
|
||||
model.get('unique_id'),
|
||||
model.get('original_file_path'),
|
||||
target_name,
|
||||
target_table_name))
|
||||
|
||||
|
||||
def source_target_not_found(model, target_name, target_table_name):
|
||||
msg = source_disabled_message(model, target_name, target_table_name)
|
||||
raise_compiler_error(msg, model)
|
||||
|
||||
|
||||
@@ -422,8 +590,8 @@ def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
|
||||
|
||||
def get_match_string(match):
|
||||
return "{}.{}".format(
|
||||
match.get('metadata', {}).get('schema'),
|
||||
match.get('metadata', {}).get('name'))
|
||||
match.get('metadata', {}).get('schema'),
|
||||
match.get('metadata', {}).get('name'))
|
||||
|
||||
raise_compiler_error(
|
||||
'dbt found two relations in your warehouse with similar database '
|
||||
@@ -461,15 +629,12 @@ def raise_duplicate_patch_name(name, patch_1, patch_2):
|
||||
)
|
||||
|
||||
|
||||
def raise_incorrect_version(path):
|
||||
def raise_invalid_schema_yml_version(path, issue):
|
||||
raise_compiler_error(
|
||||
'The schema file at {} does not contain a valid version specifier. '
|
||||
'dbt assumes that schema.yml files without version specifiers are '
|
||||
'version 1 schemas, but this file looks like a version 2 schema. If '
|
||||
'this is the case, you can fix this error by adding `version: 2` to '
|
||||
'the top of the file.\n\nOtherwise, please consult the documentation '
|
||||
'for more information on schema.yml syntax:\n\n'
|
||||
'https://docs.getdbt.com/v0.11/docs/schemayml-files'.format(path)
|
||||
'The schema file at {} is invalid because {}. Please consult the '
|
||||
'documentation for more information on schema.yml syntax:\n\n'
|
||||
'https://docs.getdbt.com/docs/schemayml-files'
|
||||
.format(path, issue)
|
||||
)
|
||||
|
||||
|
||||
@@ -478,3 +643,76 @@ def raise_unrecognized_credentials_type(typename, supported_types):
|
||||
'Unrecognized credentials type "{}" - supported types are ({})'
|
||||
.format(typename, ', '.join('"{}"'.format(t) for t in supported_types))
|
||||
)
|
||||
|
||||
|
||||
def raise_not_implemented(msg):
|
||||
raise NotImplementedException(
|
||||
"ERROR: {}"
|
||||
.format(msg))
|
||||
|
||||
|
||||
def warn_or_error(msg, node=None, log_fmt=None):
|
||||
if dbt.flags.WARN_ERROR:
|
||||
raise_compiler_error(msg, node)
|
||||
else:
|
||||
if log_fmt is not None:
|
||||
msg = log_fmt.format(msg)
|
||||
logger.warning(msg)
|
||||
|
||||
|
||||
def warn_or_raise(exc, log_fmt=None):
|
||||
if dbt.flags.WARN_ERROR:
|
||||
raise exc
|
||||
else:
|
||||
msg = str(exc)
|
||||
if log_fmt is not None:
|
||||
msg = log_fmt.format(msg)
|
||||
logger.warning(msg)
|
||||
|
||||
|
||||
# Update this when a new function should be added to the
|
||||
# dbt context's `exceptions` key!
|
||||
CONTEXT_EXPORTS = {
|
||||
fn.__name__: fn
|
||||
for fn in
|
||||
[
|
||||
missing_config,
|
||||
missing_materialization,
|
||||
missing_relation,
|
||||
raise_ambiguous_alias,
|
||||
raise_ambiguous_catalog_match,
|
||||
raise_cache_inconsistent,
|
||||
raise_compiler_error,
|
||||
raise_database_error,
|
||||
raise_dep_not_found,
|
||||
raise_dependency_error,
|
||||
raise_duplicate_patch_name,
|
||||
raise_duplicate_resource_name,
|
||||
raise_invalid_schema_yml_version,
|
||||
raise_not_implemented,
|
||||
relation_wrong_type,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def wrapper(model):
|
||||
def wrap(func):
|
||||
@functools.wraps(func)
|
||||
def inner(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception:
|
||||
exc_type, exc, exc_tb = sys.exc_info()
|
||||
if hasattr(exc, 'node') and exc.node is None:
|
||||
exc.node = model
|
||||
six.reraise(exc_type, exc, exc_tb)
|
||||
|
||||
return inner
|
||||
return wrap
|
||||
|
||||
|
||||
def wrapped_exports(model):
|
||||
wrap = wrapper(model)
|
||||
return {
|
||||
name: wrap(export) for name, export in CONTEXT_EXPORTS.items()
|
||||
}
|
||||
29
core/dbt/flags.py
Normal file
29
core/dbt/flags.py
Normal file
@@ -0,0 +1,29 @@
|
||||
STRICT_MODE = False
|
||||
FULL_REFRESH = False
|
||||
USE_CACHE = True
|
||||
WARN_ERROR = False
|
||||
TEST_NEW_PARSER = False
|
||||
|
||||
|
||||
def reset():
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER
|
||||
|
||||
STRICT_MODE = False
|
||||
FULL_REFRESH = False
|
||||
USE_CACHE = True
|
||||
WARN_ERROR = False
|
||||
TEST_NEW_PARSER = False
|
||||
|
||||
|
||||
def set_from_args(args):
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER
|
||||
USE_CACHE = getattr(args, 'use_cache', True)
|
||||
|
||||
FULL_REFRESH = getattr(args, 'full_refresh', False)
|
||||
STRICT_MODE = getattr(args, 'strict', False)
|
||||
WARN_ERROR = (
|
||||
STRICT_MODE or
|
||||
getattr(args, 'warn_error', False)
|
||||
)
|
||||
|
||||
TEST_NEW_PARSER = getattr(args, 'test_new_parser', False)
|
||||
348
core/dbt/graph/selector.py
Normal file
348
core/dbt/graph/selector.py
Normal file
@@ -0,0 +1,348 @@
|
||||
import networkx as nx
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
from dbt.utils import is_enabled, coalesce
|
||||
from dbt.node_types import NodeType
|
||||
import dbt.exceptions
|
||||
|
||||
SELECTOR_PARENTS = '+'
|
||||
SELECTOR_CHILDREN = '+'
|
||||
SELECTOR_GLOB = '*'
|
||||
SELECTOR_CHILDREN_AND_ANCESTORS = '@'
|
||||
SELECTOR_DELIMITER = ':'
|
||||
|
||||
|
||||
class SelectionCriteria(object):
|
||||
def __init__(self, node_spec):
|
||||
self.raw = node_spec
|
||||
self.select_children = False
|
||||
self.select_parents = False
|
||||
self.select_childrens_parents = False
|
||||
self.selector_type = SELECTOR_FILTERS.FQN
|
||||
|
||||
if node_spec.startswith(SELECTOR_CHILDREN_AND_ANCESTORS):
|
||||
self.select_childrens_parents = True
|
||||
node_spec = node_spec[1:]
|
||||
|
||||
if node_spec.startswith(SELECTOR_PARENTS):
|
||||
self.select_parents = True
|
||||
node_spec = node_spec[1:]
|
||||
|
||||
if node_spec.endswith(SELECTOR_CHILDREN):
|
||||
self.select_children = True
|
||||
node_spec = node_spec[:-1]
|
||||
|
||||
if self.select_children and self.select_childrens_parents:
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
'Invalid node spec {} - "@" prefix and "+" suffix are '
|
||||
'incompatible'.format(self.raw)
|
||||
)
|
||||
|
||||
if SELECTOR_DELIMITER in node_spec:
|
||||
selector_parts = node_spec.split(SELECTOR_DELIMITER, 1)
|
||||
self.selector_type, self.selector_value = selector_parts
|
||||
else:
|
||||
self.selector_value = node_spec
|
||||
|
||||
|
||||
class SELECTOR_FILTERS(object):
|
||||
FQN = 'fqn'
|
||||
TAG = 'tag'
|
||||
SOURCE = 'source'
|
||||
|
||||
|
||||
def split_specs(node_specs):
|
||||
specs = set()
|
||||
for spec in node_specs:
|
||||
parts = spec.split(" ")
|
||||
specs.update(parts)
|
||||
|
||||
return specs
|
||||
|
||||
|
||||
def get_package_names(graph):
|
||||
return set([node.split(".")[1] for node in graph.nodes()])
|
||||
|
||||
|
||||
def is_selected_node(real_node, node_selector):
|
||||
for i, selector_part in enumerate(node_selector):
|
||||
|
||||
is_last = (i == len(node_selector) - 1)
|
||||
|
||||
# if we hit a GLOB, then this node is selected
|
||||
if selector_part == SELECTOR_GLOB:
|
||||
return True
|
||||
|
||||
# match package.node_name or package.dir.node_name
|
||||
elif is_last and selector_part == real_node[-1]:
|
||||
return True
|
||||
|
||||
elif len(real_node) <= i:
|
||||
return False
|
||||
|
||||
elif real_node[i] == selector_part:
|
||||
continue
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# if we get all the way down here, then the node is a match
|
||||
return True
|
||||
|
||||
|
||||
def _node_is_match(qualified_name, package_names, fqn):
|
||||
"""Determine if a qualfied name matches an fqn, given the set of package
|
||||
names in the graph.
|
||||
|
||||
:param List[str] qualified_name: The components of the selector or node
|
||||
name, split on '.'.
|
||||
:param Set[str] package_names: The set of pacakge names in the graph.
|
||||
:param List[str] fqn: The node's fully qualified name in the graph.
|
||||
"""
|
||||
if len(qualified_name) == 1 and fqn[-1] == qualified_name[0]:
|
||||
return True
|
||||
|
||||
if qualified_name[0] in package_names:
|
||||
if is_selected_node(fqn, qualified_name):
|
||||
return True
|
||||
|
||||
for package_name in package_names:
|
||||
local_qualified_node_name = [package_name] + qualified_name
|
||||
if is_selected_node(fqn, local_qualified_node_name):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def warn_if_useless_spec(spec, nodes):
|
||||
if len(nodes) > 0:
|
||||
return
|
||||
|
||||
msg = (
|
||||
"* Spec='{}' does not identify any models"
|
||||
.format(spec['raw'])
|
||||
)
|
||||
dbt.exceptions.warn_or_error(msg, log_fmt='{} and was ignored\n')
|
||||
|
||||
|
||||
class NodeSelector(object):
|
||||
def __init__(self, linker, manifest):
|
||||
self.linker = linker
|
||||
self.manifest = manifest
|
||||
|
||||
def _node_iterator(self, graph, exclude, include):
|
||||
for node in graph.nodes():
|
||||
real_node = self.manifest.nodes[node]
|
||||
if include is not None and real_node.resource_type not in include:
|
||||
continue
|
||||
if exclude is not None and real_node.resource_type in exclude:
|
||||
continue
|
||||
yield node, real_node
|
||||
|
||||
def parsed_nodes(self, graph):
|
||||
return self._node_iterator(
|
||||
graph,
|
||||
exclude=(NodeType.Source,),
|
||||
include=None)
|
||||
|
||||
def source_nodes(self, graph):
|
||||
return self._node_iterator(
|
||||
graph,
|
||||
exclude=None,
|
||||
include=(NodeType.Source,))
|
||||
|
||||
def get_nodes_by_qualified_name(self, graph, qualified_name_selector):
|
||||
"""Yield all nodes in the graph that match the qualified_name_selector.
|
||||
|
||||
:param str qualified_name_selector: The selector or node name
|
||||
"""
|
||||
qualified_name = qualified_name_selector.split(".")
|
||||
package_names = get_package_names(graph)
|
||||
for node, real_node in self.parsed_nodes(graph):
|
||||
if _node_is_match(qualified_name, package_names, real_node.fqn):
|
||||
yield node
|
||||
|
||||
def get_nodes_by_tag(self, graph, tag_name):
|
||||
""" yields nodes from graph that have the specified tag """
|
||||
for node, real_node in self.parsed_nodes(graph):
|
||||
if tag_name in real_node.tags:
|
||||
yield node
|
||||
|
||||
def get_nodes_by_source(self, graph, source_full_name):
|
||||
"""yields nodes from graph are the specified source."""
|
||||
parts = source_full_name.split('.')
|
||||
target_package = SELECTOR_GLOB
|
||||
if len(parts) == 1:
|
||||
target_source, target_table = parts[0], None
|
||||
elif len(parts) == 2:
|
||||
target_source, target_table = parts
|
||||
elif len(parts) == 3:
|
||||
target_package, target_source, target_table = parts
|
||||
else: # len(parts) > 3 or len(parts) == 0
|
||||
msg = (
|
||||
'Invalid source selector value "{}". Sources must be of the '
|
||||
'form `${{source_name}}`, '
|
||||
'`${{source_name}}.${{target_name}}`, or '
|
||||
'`${{package_name}}.${{source_name}}.${{target_name}}'
|
||||
).format(source_full_name)
|
||||
raise dbt.exceptions.RuntimeException(msg)
|
||||
|
||||
for node, real_node in self.source_nodes(graph):
|
||||
if target_package not in (real_node.package_name, SELECTOR_GLOB):
|
||||
continue
|
||||
if target_source not in (real_node.source_name, SELECTOR_GLOB):
|
||||
continue
|
||||
if target_table in (None, real_node.name, SELECTOR_GLOB):
|
||||
yield node
|
||||
|
||||
def select_childrens_parents(self, graph, selected):
|
||||
ancestors_for = self.select_children(graph, selected) | selected
|
||||
return self.select_parents(graph, ancestors_for) | ancestors_for
|
||||
|
||||
def select_children(self, graph, selected):
|
||||
descendants = set()
|
||||
for node in selected:
|
||||
descendants.update(nx.descendants(graph, node))
|
||||
return descendants
|
||||
|
||||
def select_parents(self, graph, selected):
|
||||
ancestors = set()
|
||||
for node in selected:
|
||||
ancestors.update(nx.ancestors(graph, node))
|
||||
return ancestors
|
||||
|
||||
def collect_models(self, graph, selected, spec):
|
||||
additional = set()
|
||||
if spec.select_childrens_parents:
|
||||
additional.update(self.select_childrens_parents(graph, selected))
|
||||
if spec.select_parents:
|
||||
additional.update(self.select_parents(graph, selected))
|
||||
if spec.select_children:
|
||||
additional.update(self.select_children(graph, selected))
|
||||
return additional
|
||||
|
||||
def collect_tests(self, graph, model_nodes):
|
||||
test_nodes = set()
|
||||
for node in model_nodes:
|
||||
# include tests that depend on this node. if we aren't running
|
||||
# tests, they'll be filtered out later.
|
||||
child_tests = [n for n in graph.successors(node)
|
||||
if self.manifest.nodes[n].resource_type ==
|
||||
NodeType.Test]
|
||||
test_nodes.update(child_tests)
|
||||
return test_nodes
|
||||
|
||||
def get_nodes_from_spec(self, graph, spec):
|
||||
filter_map = {
|
||||
SELECTOR_FILTERS.FQN: self.get_nodes_by_qualified_name,
|
||||
SELECTOR_FILTERS.TAG: self.get_nodes_by_tag,
|
||||
SELECTOR_FILTERS.SOURCE: self.get_nodes_by_source,
|
||||
}
|
||||
|
||||
filter_method = filter_map.get(spec.selector_type)
|
||||
|
||||
if filter_method is None:
|
||||
valid_selectors = ", ".join(filter_map.keys())
|
||||
logger.info("The '{}' selector specified in {} is invalid. Must "
|
||||
"be one of [{}]".format(
|
||||
spec.selector_type,
|
||||
spec.raw,
|
||||
valid_selectors))
|
||||
|
||||
return set()
|
||||
|
||||
collected = set(filter_method(graph, spec.selector_value))
|
||||
collected.update(self.collect_models(graph, collected, spec))
|
||||
collected.update(self.collect_tests(graph, collected))
|
||||
|
||||
return collected
|
||||
|
||||
def select_nodes(self, graph, raw_include_specs, raw_exclude_specs):
|
||||
selected_nodes = set()
|
||||
|
||||
for raw_spec in split_specs(raw_include_specs):
|
||||
spec = SelectionCriteria(raw_spec)
|
||||
included_nodes = self.get_nodes_from_spec(graph, spec)
|
||||
selected_nodes.update(included_nodes)
|
||||
|
||||
for raw_spec in split_specs(raw_exclude_specs):
|
||||
spec = SelectionCriteria(raw_spec)
|
||||
excluded_nodes = self.get_nodes_from_spec(graph, spec)
|
||||
selected_nodes.difference_update(excluded_nodes)
|
||||
|
||||
return selected_nodes
|
||||
|
||||
def _is_graph_member(self, node_name):
|
||||
node = self.manifest.nodes[node_name]
|
||||
if node.resource_type == NodeType.Source:
|
||||
return True
|
||||
return not node.get('empty') and is_enabled(node)
|
||||
|
||||
def get_valid_nodes(self, graph):
|
||||
return [
|
||||
node_name for node_name in graph.nodes()
|
||||
if self._is_graph_member(node_name)
|
||||
]
|
||||
|
||||
def _is_match(self, node_name, resource_types, tags, required):
|
||||
node = self.manifest.nodes[node_name]
|
||||
if node.resource_type not in resource_types:
|
||||
return False
|
||||
tags = set(tags)
|
||||
if tags and not bool(set(node.tags) & tags):
|
||||
# there are tags specified but none match
|
||||
return False
|
||||
for attr in required:
|
||||
if not getattr(node, attr):
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_selected(self, include, exclude, resource_types, tags, required):
|
||||
graph = self.linker.graph
|
||||
|
||||
include = coalesce(include, ['fqn:*', 'source:*'])
|
||||
exclude = coalesce(exclude, [])
|
||||
tags = coalesce(tags, [])
|
||||
|
||||
to_run = self.get_valid_nodes(graph)
|
||||
filtered_graph = graph.subgraph(to_run)
|
||||
selected_nodes = self.select_nodes(filtered_graph, include, exclude)
|
||||
|
||||
filtered_nodes = set()
|
||||
for node_name in selected_nodes:
|
||||
if self._is_match(node_name, resource_types, tags, required):
|
||||
filtered_nodes.add(node_name)
|
||||
|
||||
return filtered_nodes
|
||||
|
||||
def select(self, query):
|
||||
include = query.get('include')
|
||||
exclude = query.get('exclude')
|
||||
resource_types = query.get('resource_types')
|
||||
tags = query.get('tags')
|
||||
required = query.get('required', ())
|
||||
addin_ephemeral_nodes = query.get('addin_ephemeral_nodes', True)
|
||||
|
||||
selected = self.get_selected(include, exclude, resource_types, tags,
|
||||
required)
|
||||
|
||||
# if you haven't selected any nodes, return that so we can give the
|
||||
# nice "no models selected" message.
|
||||
if not selected:
|
||||
return selected
|
||||
|
||||
# we used to carefully go through all node ancestors and add those if
|
||||
# they were ephemeral. Sadly, the algorithm we used ended up being
|
||||
# O(n^2). Instead, since ephemeral nodes are almost free, just add all
|
||||
# ephemeral nodes in the graph.
|
||||
# someday at large enough scale we might want to prune it to only be
|
||||
# ancestors of the selected nodes so we can skip the compile.
|
||||
if addin_ephemeral_nodes:
|
||||
addins = {
|
||||
uid for uid, node in self.manifest.nodes.items()
|
||||
if node.is_ephemeral_model
|
||||
}
|
||||
else:
|
||||
addins = set()
|
||||
|
||||
return selected | addins
|
||||
@@ -12,7 +12,7 @@ class ModelHookType:
|
||||
def _parse_hook_to_dict(hook_string):
|
||||
try:
|
||||
hook_dict = json.loads(hook_string)
|
||||
except ValueError as e:
|
||||
except ValueError:
|
||||
hook_dict = {"sql": hook_string}
|
||||
|
||||
if 'transaction' not in hook_dict:
|
||||
1
core/dbt/include/__init__.py
Normal file
1
core/dbt/include/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
||||
11
core/dbt/include/global_project/__init__.py
Normal file
11
core/dbt/include/global_project/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import os
|
||||
|
||||
PACKAGE_PATH = os.path.dirname(__file__)
|
||||
PROJECT_NAME = 'dbt'
|
||||
|
||||
DOCS_INDEX_FILE_PATH = os.path.normpath(
|
||||
os.path.join(PACKAGE_PATH, '..', "index.html"))
|
||||
|
||||
|
||||
# Adapter registration will add to this
|
||||
PACKAGES = {PROJECT_NAME: PACKAGE_PATH}
|
||||
@@ -27,7 +27,7 @@ button at the top-right of this lineage pane, you'll be able to see all of the m
|
||||
or are built from, the model you're exploring.
|
||||
|
||||
Once expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the
|
||||
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/reference#section-specifying-models-to-run).
|
||||
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).
|
||||
|
||||
Note that you can also right-click on models to interactively filter and explore the graph.
|
||||
|
||||
269
core/dbt/include/global_project/macros/adapters/common.sql
Normal file
269
core/dbt/include/global_project/macros/adapters/common.sql
Normal file
@@ -0,0 +1,269 @@
|
||||
{% macro adapter_macro(name) -%}
|
||||
{% set original_name = name %}
|
||||
{% if '.' in name %}
|
||||
{% set package_name, name = name.split(".", 1) %}
|
||||
{% else %}
|
||||
{% set package_name = none %}
|
||||
{% endif %}
|
||||
|
||||
{% if package_name is none %}
|
||||
{% set package_context = context %}
|
||||
{% elif package_name in context %}
|
||||
{% set package_context = context[package_name] %}
|
||||
{% else %}
|
||||
{% set error_msg %}
|
||||
In adapter_macro: could not find package '{{package_name}}', called with '{{original_name}}'
|
||||
{% endset %}
|
||||
{{ exceptions.raise_compiler_error(error_msg | trim) }}
|
||||
{% endif %}
|
||||
|
||||
{%- set separator = '__' -%}
|
||||
{%- set search_name = adapter.type() + separator + name -%}
|
||||
{%- set default_name = 'default' + separator + name -%}
|
||||
|
||||
{%- if package_context.get(search_name) is not none -%}
|
||||
{{ return(package_context[search_name](*varargs, **kwargs)) }}
|
||||
{%- else -%}
|
||||
{{ return(package_context[default_name](*varargs, **kwargs)) }}
|
||||
{%- endif -%}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro get_columns_in_query(select_sql) -%}
|
||||
{{ return(adapter_macro('get_columns_in_query', select_sql)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__get_columns_in_query(select_sql) %}
|
||||
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
|
||||
select * from (
|
||||
{{ select_sql }}
|
||||
) as __dbt_sbq
|
||||
where false
|
||||
limit 0
|
||||
{% endcall %}
|
||||
|
||||
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro create_schema(database_name, schema_name) -%}
|
||||
{{ adapter_macro('create_schema', database_name, schema_name) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__create_schema(database_name, schema_name) -%}
|
||||
{%- call statement('create_schema') -%}
|
||||
create schema if not exists {{database_name}}.{{schema_name}}
|
||||
{% endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro drop_schema(database_name, schema_name) -%}
|
||||
{{ adapter_macro('drop_schema', database_name, schema_name) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__drop_schema(database_name, schema_name) -%}
|
||||
{%- call statement('drop_schema') -%}
|
||||
drop schema if exists {{database_name}}.{{schema_name}} cascade
|
||||
{% endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro create_table_as(temporary, relation, sql) -%}
|
||||
{{ adapter_macro('create_table_as', temporary, relation, sql) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__create_table_as(temporary, relation, sql) -%}
|
||||
create {% if temporary: -%}temporary{%- endif %} table
|
||||
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
|
||||
as (
|
||||
{{ sql }}
|
||||
);
|
||||
{% endmacro %}
|
||||
|
||||
{% macro create_view_as(relation, sql) -%}
|
||||
{{ adapter_macro('create_view_as', relation, sql) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__create_view_as(relation, sql) -%}
|
||||
create view {{ relation }} as (
|
||||
{{ sql }}
|
||||
);
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro get_catalog(information_schemas) -%}
|
||||
{{ return(adapter_macro('get_catalog', information_schemas)) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__get_catalog(information_schemas) -%}
|
||||
|
||||
{% set typename = adapter.type() %}
|
||||
{% set msg -%}
|
||||
get_catalog not implemented for {{ typename }}
|
||||
{%- endset %}
|
||||
|
||||
{{ exceptions.raise_compiler_error(msg) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro get_columns_in_relation(relation) -%}
|
||||
{{ return(adapter_macro('get_columns_in_relation', relation)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro sql_convert_columns_in_relation(table) -%}
|
||||
{% set columns = [] %}
|
||||
{% for row in table %}
|
||||
{% do columns.append(api.Column(*row)) %}
|
||||
{% endfor %}
|
||||
{{ return(columns) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__get_columns_in_relation(relation) -%}
|
||||
{{ exceptions.raise_not_implemented(
|
||||
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro alter_column_type(relation, column_name, new_column_type) -%}
|
||||
{{ return(adapter_macro('alter_column_type', relation, column_name, new_column_type)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
|
||||
{#
|
||||
1. Create a new column (w/ temp name and correct type)
|
||||
2. Copy data over to it
|
||||
3. Drop the existing column (cascade!)
|
||||
4. Rename the new column to existing column
|
||||
#}
|
||||
{%- set tmp_column = column_name + "__dbt_alter" -%}
|
||||
|
||||
{% call statement('alter_column_type') %}
|
||||
alter table {{ relation }} add column {{ tmp_column }} {{ new_column_type }};
|
||||
update {{ relation }} set {{ tmp_column }} = {{ column_name }};
|
||||
alter table {{ relation }} drop column {{ column_name }} cascade;
|
||||
alter table {{ relation }} rename column {{ tmp_column }} to {{ column_name }}
|
||||
{% endcall %}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro drop_relation(relation) -%}
|
||||
{{ return(adapter_macro('drop_relation', relation)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__drop_relation(relation) -%}
|
||||
{% call statement('drop_relation', auto_begin=False) -%}
|
||||
drop {{ relation.type }} if exists {{ relation }} cascade
|
||||
{%- endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro truncate_relation(relation) -%}
|
||||
{{ return(adapter_macro('truncate_relation', relation)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__truncate_relation(relation) -%}
|
||||
{% call statement('truncate_relation') -%}
|
||||
truncate table {{ relation }}
|
||||
{%- endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro rename_relation(from_relation, to_relation) -%}
|
||||
{{ return(adapter_macro('rename_relation', from_relation, to_relation)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__rename_relation(from_relation, to_relation) -%}
|
||||
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
|
||||
{% call statement('rename_relation') -%}
|
||||
alter table {{ from_relation }} rename to {{ target_name }}
|
||||
{%- endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro information_schema_name(database) %}
|
||||
{{ return(adapter_macro('information_schema_name', database)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__information_schema_name(database) -%}
|
||||
{%- if database -%}
|
||||
{{ adapter.quote_as_configured(database, 'database') }}.information_schema
|
||||
{%- else -%}
|
||||
information_schema
|
||||
{%- endif -%}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro list_schemas(database) -%}
|
||||
{{ return(adapter_macro('list_schemas', database)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__list_schemas(database) -%}
|
||||
{% call statement('list_schemas', fetch_result=True, auto_begin=False) %}
|
||||
select distinct schema_name
|
||||
from {{ information_schema_name(database) }}.schemata
|
||||
where catalog_name ilike '{{ database }}'
|
||||
{% endcall %}
|
||||
{{ return(load_result('list_schemas').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro check_schema_exists(information_schema, schema) -%}
|
||||
{{ return(adapter_macro('check_schema_exists', information_schema, schema)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__check_schema_exists(information_schema, schema) -%}
|
||||
{% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%}
|
||||
select count(*)
|
||||
from {{ information_schema }}.schemata
|
||||
where catalog_name='{{ information_schema.database }}'
|
||||
and schema_name='{{ schema }}'
|
||||
{%- endcall %}
|
||||
{{ return(load_result('check_schema_exists').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro list_relations_without_caching(information_schema, schema) %}
|
||||
{{ return(adapter_macro('list_relations_without_caching', information_schema, schema)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__list_relations_without_caching(information_schema, schema) %}
|
||||
{{ exceptions.raise_not_implemented(
|
||||
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro current_timestamp() -%}
|
||||
{{ adapter_macro('current_timestamp') }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro default__current_timestamp() -%}
|
||||
{{ exceptions.raise_not_implemented(
|
||||
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro collect_freshness(source, loaded_at_field) %}
|
||||
{{ return(adapter_macro('collect_freshness', source, loaded_at_field))}}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__collect_freshness(source, loaded_at_field) %}
|
||||
{% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%}
|
||||
select
|
||||
max({{ loaded_at_field }}) as max_loaded_at,
|
||||
{{ current_timestamp() }} as snapshotted_at
|
||||
from {{ source }}
|
||||
{% endcall %}
|
||||
{{ return(load_result('check_schema_exists').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
|
||||
{{ return(adapter_macro('make_temp_relation', base_relation, suffix))}}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__make_temp_relation(base_relation, suffix) %}
|
||||
{% set tmp_identifier = base_relation.identifier ~ suffix %}
|
||||
{% set tmp_relation = base_relation.incorporate(
|
||||
path={"identifier": tmp_identifier},
|
||||
table_name=tmp_identifier) -%}
|
||||
|
||||
{% do return(tmp_relation) %}
|
||||
{% endmacro %}
|
||||
@@ -48,9 +48,13 @@
|
||||
{% set start_date = partition_range[0] %}
|
||||
{% set end_date = partition_range[1] %}
|
||||
{% else %}
|
||||
{{ dbt.exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }}
|
||||
{{ exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }}
|
||||
{% endif %}
|
||||
|
||||
{{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro py_current_timestring() %}
|
||||
{% set dt = modules.datetime.datetime.now() %}
|
||||
{% do return(dt.strftime("%Y%m%d%H%M%S%f")) %}
|
||||
{% endmacro %}
|
||||
@@ -0,0 +1,27 @@
|
||||
|
||||
{#
|
||||
Renders a alias name given a custom alias name. If the custom
|
||||
alias name is none, then the resulting alias is just the filename of the
|
||||
model. If an alias override is specified, then that is used.
|
||||
|
||||
This macro can be overriden in projects to define different semantics
|
||||
for rendering a alias name.
|
||||
|
||||
Arguments:
|
||||
custom_alias_name: The custom alias name specified for a model, or none
|
||||
node: The available node that an alias is being generated for, or none
|
||||
|
||||
#}
|
||||
{% macro generate_alias_name(custom_alias_name=none, node=none) -%}
|
||||
|
||||
{%- if custom_alias_name is none -%}
|
||||
|
||||
{{ node.name }}
|
||||
|
||||
{%- else -%}
|
||||
|
||||
{{ custom_alias_name | trim }}
|
||||
|
||||
{%- endif -%}
|
||||
|
||||
{%- endmacro %}
|
||||
@@ -3,7 +3,7 @@
|
||||
Renders a schema name given a custom schema name. If the custom
|
||||
schema name is none, then the resulting schema is just the "schema"
|
||||
value in the specified target. If a schema override is specified, then
|
||||
the resulting schema is the default schema concatenated with the
|
||||
the resulting schema is the default schema concatenated with the
|
||||
custom schema.
|
||||
|
||||
This macro can be overriden in projects to define different semantics
|
||||
@@ -11,9 +11,10 @@
|
||||
|
||||
Arguments:
|
||||
custom_schema_name: The custom schema name specified for a model, or none
|
||||
node: The node the schema is being generated for
|
||||
|
||||
#}
|
||||
{% macro generate_schema_name(custom_schema_name=none) -%}
|
||||
{% macro generate_schema_name(custom_schema_name, node) -%}
|
||||
|
||||
{%- set default_schema = target.schema -%}
|
||||
{%- if custom_schema_name is none -%}
|
||||
@@ -36,9 +37,10 @@
|
||||
|
||||
Arguments:
|
||||
custom_schema_name: The custom schema name specified for a model, or none
|
||||
node: The node the schema is being generated for
|
||||
|
||||
#}
|
||||
{% macro generate_schema_name_for_env(custom_schema_name=none) -%}
|
||||
{% macro generate_schema_name_for_env(custom_schema_name, node) -%}
|
||||
|
||||
{%- set default_schema = target.schema -%}
|
||||
{%- if target.name == 'prod' and custom_schema_name is not none -%}
|
||||
@@ -0,0 +1,18 @@
|
||||
{% macro table_options() %}
|
||||
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
|
||||
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro get_relation_comment(persist_docs, model) %}
|
||||
|
||||
{%- if persist_docs is not mapping -%}
|
||||
{{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }}
|
||||
{% endif %}
|
||||
|
||||
{% if persist_docs.get('relation', false) %}
|
||||
{{ return((model.description | tojson)[1:-1]) }}
|
||||
{%- else -%}
|
||||
{{ return(none) }}
|
||||
{% endif %}
|
||||
|
||||
{% endmacro %}
|
||||
@@ -0,0 +1,13 @@
|
||||
|
||||
{% macro is_incremental() %}
|
||||
{#-- do not run introspective queries in parsing #}
|
||||
{% if not execute %}
|
||||
{{ return(False) }}
|
||||
{% else %}
|
||||
{% set relation = adapter.get_relation(this.database, this.schema, this.table) %}
|
||||
{{ return(relation is not none
|
||||
and relation.type == 'table'
|
||||
and model.config.materialized == 'incremental'
|
||||
and not flags.FULL_REFRESH) }}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
8
core/dbt/include/global_project/macros/etc/query.sql
Normal file
8
core/dbt/include/global_project/macros/etc/query.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
{% macro run_query(sql) %}
|
||||
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
|
||||
{{ sql }}
|
||||
{% endcall %}
|
||||
|
||||
{% do return(load_result("run_query_statement").table) %}
|
||||
{% endmacro %}
|
||||
@@ -0,0 +1,71 @@
|
||||
|
||||
|
||||
{% macro get_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{{ adapter_macro('get_merge_sql', target, source, unique_key, dest_columns) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{{ adapter_macro('get_delete_insert_merge_sql', target, source, unique_key, dest_columns) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro common_get_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{%- set dest_cols_csv = dest_columns | map(attribute="name") | join(', ') -%}
|
||||
|
||||
merge into {{ target }} as DBT_INTERNAL_DEST
|
||||
using {{ source }} as DBT_INTERNAL_SOURCE
|
||||
|
||||
{% if unique_key %}
|
||||
on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
|
||||
{% else %}
|
||||
on FALSE
|
||||
{% endif %}
|
||||
|
||||
{% if unique_key %}
|
||||
when matched then update set
|
||||
{% for column in dest_columns -%}
|
||||
{{ column.name }} = DBT_INTERNAL_SOURCE.{{ column.name }}
|
||||
{%- if not loop.last %}, {%- endif %}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
|
||||
when not matched then insert
|
||||
({{ dest_cols_csv }})
|
||||
values
|
||||
({{ dest_cols_csv }})
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__get_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{% set typename = adapter.type() %}
|
||||
|
||||
{{ exceptions.raise_compiler_error(
|
||||
'get_merge_sql is not implemented for {}'.format(typename)
|
||||
)
|
||||
}}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{%- set dest_cols_csv = dest_columns | map(attribute="name") | join(', ') -%}
|
||||
|
||||
{% if unique_key is not none %}
|
||||
delete from {{ target }}
|
||||
where ({{ unique_key }}) in (
|
||||
select ({{ unique_key }})
|
||||
from {{ source }}
|
||||
);
|
||||
{% endif %}
|
||||
|
||||
insert into {{ target }} ({{ dest_cols_csv }})
|
||||
(
|
||||
select {{ dest_cols_csv }}
|
||||
from {{ source }}
|
||||
);
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
|
||||
{{ common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) }}
|
||||
{% endmacro %}
|
||||
@@ -14,14 +14,14 @@
|
||||
|
||||
{% macro column_list(columns) %}
|
||||
{%- for col in columns %}
|
||||
{{ adapter.quote(col.name) }} {% if not loop.last %},{% endif %}
|
||||
{{ col.name }} {% if not loop.last %},{% endif %}
|
||||
{% endfor -%}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro column_list_for_create_table(columns) %}
|
||||
{%- for col in columns %}
|
||||
{{ adapter.quote(col.name) }} {{ col.data_type }} {%- if not loop.last %},{% endif %}
|
||||
{{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %}
|
||||
{% endfor -%}
|
||||
{% endmacro %}
|
||||
|
||||
@@ -6,38 +6,29 @@
|
||||
from {{ target_relation }}
|
||||
where ({{ unique_key }}) in (
|
||||
select ({{ unique_key }})
|
||||
from {{ tmp_relation.include(schema=False) }}
|
||||
from {{ tmp_relation.include(schema=False, database=False) }}
|
||||
);
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
{% materialization incremental, default -%}
|
||||
{%- set sql_where = config.require('sql_where') -%}
|
||||
{%- set unique_key = config.get('unique_key') -%}
|
||||
|
||||
{%- set identifier = model['alias'] -%}
|
||||
{%- set tmp_identifier = identifier + '__dbt_incremental_tmp' -%}
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
{%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%}
|
||||
{%- set tmp_relation = make_temp_relation(target_relation) %}
|
||||
|
||||
{%- set old_relation = adapter.get_relation(schema=schema, identifier=identifier) -%}
|
||||
{%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, type='table') -%}
|
||||
{%- set tmp_relation = api.Relation.create(identifier=tmp_identifier,
|
||||
schema=schema, type='table') -%}
|
||||
|
||||
{%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%}
|
||||
{%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
|
||||
|
||||
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
|
||||
{%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%}
|
||||
|
||||
{%- set should_truncate = (non_destructive_mode and full_refresh_mode and exists_as_table) -%}
|
||||
{%- set should_drop = (not should_truncate and (full_refresh_mode or exists_not_as_table)) -%}
|
||||
{%- set force_create = (flags.FULL_REFRESH and not flags.NON_DESTRUCTIVE) -%}
|
||||
{%- set should_drop = (full_refresh_mode or exists_not_as_table) -%}
|
||||
|
||||
-- setup
|
||||
{% if old_relation is none -%}
|
||||
-- noop
|
||||
{%- elif should_truncate -%}
|
||||
{{ adapter.truncate_relation(old_relation) }}
|
||||
{%- elif should_drop -%}
|
||||
{{ adapter.drop_relation(old_relation) }}
|
||||
{%- set old_relation = none -%}
|
||||
@@ -49,32 +40,22 @@
|
||||
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
||||
|
||||
-- build model
|
||||
{% if force_create or old_relation is none -%}
|
||||
{% if full_refresh_mode or old_relation is none -%}
|
||||
{%- call statement('main') -%}
|
||||
{{ create_table_as(False, target_relation, sql) }}
|
||||
{%- endcall -%}
|
||||
{%- else -%}
|
||||
{%- call statement() -%}
|
||||
|
||||
{% set tmp_table_sql -%}
|
||||
{# We are using a subselect instead of a CTE here to allow PostgreSQL to use indexes. -#}
|
||||
select * from (
|
||||
{{ sql }}
|
||||
) as dbt_incr_sbq
|
||||
where ({{ sql_where }})
|
||||
or ({{ sql_where }}) is null
|
||||
{%- endset %}
|
||||
|
||||
{{ dbt.create_table_as(True, tmp_relation, tmp_table_sql) }}
|
||||
{{ dbt.create_table_as(True, tmp_relation, sql) }}
|
||||
|
||||
{%- endcall -%}
|
||||
|
||||
{{ adapter.expand_target_column_types(temp_table=tmp_identifier,
|
||||
to_schema=schema,
|
||||
to_table=identifier) }}
|
||||
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
|
||||
to_relation=target_relation) }}
|
||||
|
||||
{%- call statement('main') -%}
|
||||
{% set dest_columns = adapter.get_columns_in_table(schema, identifier) %}
|
||||
{% set dest_columns = adapter.get_columns_in_relation(target_relation) %}
|
||||
{% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %}
|
||||
|
||||
{% if unique_key is not none -%}
|
||||
@@ -86,7 +67,7 @@
|
||||
insert into {{ target_relation }} ({{ dest_cols_csv }})
|
||||
(
|
||||
select {{ dest_cols_csv }}
|
||||
from {{ tmp_relation.include(schema=False) }}
|
||||
from {{ tmp_relation }}
|
||||
);
|
||||
{% endcall %}
|
||||
{%- endif %}
|
||||
@@ -47,14 +47,14 @@
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__load_csv_rows(model) %}
|
||||
{% macro basic_load_csv_rows(model, batch_size) %}
|
||||
{% set agate_table = model['agate_table'] %}
|
||||
{% set cols_sql = ", ".join(agate_table.column_names) %}
|
||||
{% set bindings = [] %}
|
||||
|
||||
{% set statements = [] %}
|
||||
|
||||
{% for chunk in agate_table.rows | batch(10000) %}
|
||||
{% for chunk in agate_table.rows | batch(batch_size) %}
|
||||
{% set bindings = [] %}
|
||||
|
||||
{% for row in chunk %}
|
||||
@@ -84,12 +84,17 @@
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro default__load_csv_rows(model) %}
|
||||
{{ return(basic_load_csv_rows(model, 10000) )}}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% materialization seed, default %}
|
||||
|
||||
{%- set identifier = model['alias'] -%}
|
||||
{%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
|
||||
|
||||
{%- set old_relation = adapter.get_relation(schema=schema, identifier=identifier) -%}
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
|
||||
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
|
||||
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
|
||||
@@ -0,0 +1,262 @@
|
||||
{#
|
||||
Add new columns to the table if applicable
|
||||
#}
|
||||
{% macro create_columns(relation, columns) %}
|
||||
{{ adapter_macro('create_columns', relation, columns) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__create_columns(relation, columns) %}
|
||||
{% for column in columns %}
|
||||
{% call statement() %}
|
||||
alter table {{ relation }} add column "{{ column.name }}" {{ column.data_type }};
|
||||
{% endcall %}
|
||||
{% endfor %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro post_snapshot(staging_relation) %}
|
||||
{{ adapter_macro('post_snapshot', staging_relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__post_snapshot(staging_relation) %}
|
||||
{# no-op #}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%}
|
||||
|
||||
with snapshot_query as (
|
||||
|
||||
{{ source_sql }}
|
||||
|
||||
),
|
||||
|
||||
snapshotted_data as (
|
||||
|
||||
select *,
|
||||
{{ strategy.unique_key }} as dbt_unique_key
|
||||
|
||||
from {{ target_relation }}
|
||||
|
||||
),
|
||||
|
||||
source_data as (
|
||||
|
||||
select *,
|
||||
{{ strategy.scd_id }} as dbt_scd_id,
|
||||
{{ strategy.unique_key }} as dbt_unique_key,
|
||||
{{ strategy.updated_at }} as dbt_updated_at,
|
||||
{{ strategy.updated_at }} as dbt_valid_from,
|
||||
nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to
|
||||
|
||||
from snapshot_query
|
||||
),
|
||||
|
||||
insertions as (
|
||||
|
||||
select
|
||||
'insert' as dbt_change_type,
|
||||
source_data.*
|
||||
|
||||
from source_data
|
||||
left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key
|
||||
where snapshotted_data.dbt_unique_key is null
|
||||
or (
|
||||
snapshotted_data.dbt_unique_key is not null
|
||||
and snapshotted_data.dbt_valid_to is null
|
||||
and (
|
||||
{{ strategy.row_changed }}
|
||||
)
|
||||
)
|
||||
|
||||
)
|
||||
|
||||
select * from insertions
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro snapshot_staging_table_updates(strategy, source_sql, target_relation) -%}
|
||||
|
||||
with snapshot_query as (
|
||||
|
||||
{{ source_sql }}
|
||||
|
||||
),
|
||||
|
||||
snapshotted_data as (
|
||||
|
||||
select *,
|
||||
{{ strategy.unique_key }} as dbt_unique_key
|
||||
|
||||
from {{ target_relation }}
|
||||
|
||||
),
|
||||
|
||||
source_data as (
|
||||
|
||||
select
|
||||
*,
|
||||
{{ strategy.scd_id }} as dbt_scd_id,
|
||||
{{ strategy.unique_key }} as dbt_unique_key,
|
||||
{{ strategy.updated_at }} as dbt_updated_at,
|
||||
{{ strategy.updated_at }} as dbt_valid_from
|
||||
|
||||
from snapshot_query
|
||||
),
|
||||
|
||||
updates as (
|
||||
|
||||
select
|
||||
'update' as dbt_change_type,
|
||||
snapshotted_data.dbt_scd_id,
|
||||
source_data.dbt_valid_from as dbt_valid_to
|
||||
|
||||
from source_data
|
||||
join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key
|
||||
where snapshotted_data.dbt_valid_to is null
|
||||
and (
|
||||
{{ strategy.row_changed }}
|
||||
)
|
||||
|
||||
)
|
||||
|
||||
select * from updates
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro build_snapshot_table(strategy, sql) %}
|
||||
|
||||
select *,
|
||||
{{ strategy.scd_id }} as dbt_scd_id,
|
||||
{{ strategy.updated_at }} as dbt_updated_at,
|
||||
{{ strategy.updated_at }} as dbt_valid_from,
|
||||
nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to
|
||||
from (
|
||||
{{ sql }}
|
||||
) sbq
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro get_or_create_relation(database, schema, identifier, type) %}
|
||||
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
|
||||
|
||||
{% if target_relation %}
|
||||
{% do return([true, target_relation]) %}
|
||||
{% endif %}
|
||||
|
||||
{%- set new_relation = api.Relation.create(
|
||||
database=database,
|
||||
schema=schema,
|
||||
identifier=identifier,
|
||||
type=type
|
||||
) -%}
|
||||
{% do return([false, new_relation]) %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}
|
||||
{% set tmp_relation = make_temp_relation(target_relation) %}
|
||||
|
||||
{% set inserts_select = snapshot_staging_table_inserts(strategy, sql, target_relation) %}
|
||||
{% set updates_select = snapshot_staging_table_updates(strategy, sql, target_relation) %}
|
||||
|
||||
{% call statement('build_snapshot_staging_relation_inserts') %}
|
||||
{{ create_table_as(True, tmp_relation, inserts_select) }}
|
||||
{% endcall %}
|
||||
|
||||
{% call statement('build_snapshot_staging_relation_updates') %}
|
||||
insert into {{ tmp_relation }} (dbt_change_type, dbt_scd_id, dbt_valid_to)
|
||||
select dbt_change_type, dbt_scd_id, dbt_valid_to from (
|
||||
{{ updates_select }}
|
||||
) dbt_sbq;
|
||||
{% endcall %}
|
||||
|
||||
{% do return(tmp_relation) %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% materialization snapshot, default %}
|
||||
{%- set config = model['config'] -%}
|
||||
|
||||
{%- set target_database = config.get('target_database') -%}
|
||||
{%- set target_schema = config.get('target_schema') -%}
|
||||
{%- set target_table = model.get('alias', model.get('name')) -%}
|
||||
|
||||
{%- set strategy_name = config.get('strategy') -%}
|
||||
{%- set unique_key = config.get('unique_key') %}
|
||||
|
||||
{% if not adapter.check_schema_exists(target_database, target_schema) %}
|
||||
{% do create_schema(target_database, target_schema) %}
|
||||
{% endif %}
|
||||
|
||||
{% set target_relation_exists, target_relation = get_or_create_relation(
|
||||
database=target_database,
|
||||
schema=target_schema,
|
||||
identifier=target_table,
|
||||
type='table') -%}
|
||||
|
||||
{%- if not target_relation.is_table -%}
|
||||
{% do exceptions.relation_wrong_type(target_relation, 'table') %}
|
||||
{%- endif -%}
|
||||
|
||||
{% set strategy_macro = strategy_dispatch(strategy_name) %}
|
||||
{% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %}
|
||||
|
||||
{% if not target_relation_exists %}
|
||||
|
||||
{% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %}
|
||||
{% call statement('main') -%}
|
||||
{{ create_table_as(False, target_relation, build_sql) }}
|
||||
{% endcall %}
|
||||
|
||||
{% else %}
|
||||
|
||||
{{ adapter.valid_snapshot_target(target_relation) }}
|
||||
|
||||
{% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}
|
||||
|
||||
-- this may no-op if the database does not require column expansion
|
||||
{% do adapter.expand_target_column_types(from_relation=staging_table,
|
||||
to_relation=target_relation) %}
|
||||
|
||||
{% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)
|
||||
| rejectattr('name', 'equalto', 'dbt_change_type')
|
||||
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
|
||||
| rejectattr('name', 'equalto', 'dbt_unique_key')
|
||||
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
|
||||
| list %}
|
||||
|
||||
{% do create_columns(target_relation, missing_columns) %}
|
||||
|
||||
{% set source_columns = adapter.get_columns_in_relation(staging_table)
|
||||
| rejectattr('name', 'equalto', 'dbt_change_type')
|
||||
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
|
||||
| rejectattr('name', 'equalto', 'dbt_unique_key')
|
||||
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
|
||||
| list %}
|
||||
|
||||
{% set quoted_source_columns = [] %}
|
||||
{% for column in source_columns %}
|
||||
{% do quoted_source_columns.append(adapter.quote(column.name)) %}
|
||||
{% endfor %}
|
||||
|
||||
{% call statement('main') %}
|
||||
{{ snapshot_merge_sql(
|
||||
target = target_relation,
|
||||
source = staging_table,
|
||||
insert_cols = quoted_source_columns
|
||||
)
|
||||
}}
|
||||
{% endcall %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{{ adapter.commit() }}
|
||||
|
||||
{% if staging_table is defined %}
|
||||
{% do post_snapshot(staging_table) %}
|
||||
{% endif %}
|
||||
|
||||
{% endmaterialization %}
|
||||
@@ -0,0 +1,27 @@
|
||||
|
||||
{% macro snapshot_merge_sql(target, source, insert_cols) -%}
|
||||
{{ adapter_macro('snapshot_merge_sql', target, source, insert_cols) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro default__snapshot_merge_sql(target, source, insert_cols) -%}
|
||||
{%- set insert_cols_csv = insert_cols | join(', ') -%}
|
||||
|
||||
merge into {{ target }} as DBT_INTERNAL_DEST
|
||||
using {{ source }} as DBT_INTERNAL_SOURCE
|
||||
on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id
|
||||
|
||||
when matched
|
||||
and DBT_INTERNAL_DEST.dbt_valid_to is null
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type = 'update'
|
||||
then update
|
||||
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
|
||||
|
||||
when not matched
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'
|
||||
then insert ({{ insert_cols_csv }})
|
||||
values ({{ insert_cols_csv }})
|
||||
;
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
{#
|
||||
Dispatch strategies by name, optionally qualified to a package
|
||||
#}
|
||||
{% macro strategy_dispatch(name) -%}
|
||||
{% set original_name = name %}
|
||||
{% if '.' in name %}
|
||||
{% set package_name, name = name.split(".", 1) %}
|
||||
{% else %}
|
||||
{% set package_name = none %}
|
||||
{% endif %}
|
||||
|
||||
{% if package_name is none %}
|
||||
{% set package_context = context %}
|
||||
{% elif package_name in context %}
|
||||
{% set package_context = context[package_name] %}
|
||||
{% else %}
|
||||
{% set error_msg %}
|
||||
Could not find package '{{package_name}}', called with '{{original_name}}'
|
||||
{% endset %}
|
||||
{{ exceptions.raise_compiler_error(error_msg | trim) }}
|
||||
{% endif %}
|
||||
|
||||
{%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%}
|
||||
|
||||
{% if search_name not in package_context %}
|
||||
{% set error_msg %}
|
||||
The specified strategy macro '{{name}}' was not found in package '{{ package_name }}'
|
||||
{% endset %}
|
||||
{{ exceptions.raise_compiler_error(error_msg | trim) }}
|
||||
{% endif %}
|
||||
{{ return(package_context[search_name]) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{#
|
||||
Create SCD Hash SQL fields cross-db
|
||||
#}
|
||||
{% macro snapshot_hash_arguments(args) -%}
|
||||
{{ adapter_macro('snapshot_hash_arguments', args) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro default__snapshot_hash_arguments(args) -%}
|
||||
md5({%- for arg in args -%}
|
||||
coalesce(cast({{ arg }} as varchar ), '')
|
||||
{% if not loop.last %} || '|' || {% endif %}
|
||||
{%- endfor -%})
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{#
|
||||
Get the current time cross-db
|
||||
#}
|
||||
{% macro snapshot_get_time() -%}
|
||||
{{ adapter_macro('snapshot_get_time') }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__snapshot_get_time() -%}
|
||||
{{ current_timestamp() }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{#
|
||||
Core strategy definitions
|
||||
#}
|
||||
{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}
|
||||
{% set primary_key = config['unique_key'] %}
|
||||
{% set updated_at = config['updated_at'] %}
|
||||
|
||||
{% set row_changed_expr -%}
|
||||
({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }})
|
||||
{%- endset %}
|
||||
|
||||
{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
|
||||
|
||||
{% do return({
|
||||
"unique_key": primary_key,
|
||||
"updated_at": updated_at,
|
||||
"row_changed": row_changed_expr,
|
||||
"scd_id": scd_id_expr
|
||||
}) %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}
|
||||
{% set check_cols_config = config['check_cols'] %}
|
||||
{% set primary_key = config['unique_key'] %}
|
||||
{% set updated_at = snapshot_get_time() %}
|
||||
|
||||
{% if check_cols_config == 'all' %}
|
||||
{% set check_cols = get_columns_in_query(node['injected_sql']) %}
|
||||
{% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}
|
||||
{% set check_cols = check_cols_config %}
|
||||
{% else %}
|
||||
{% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %}
|
||||
{% endif %}
|
||||
|
||||
{% set row_changed_expr -%}
|
||||
(
|
||||
{% for col in check_cols %}
|
||||
{{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}
|
||||
or
|
||||
({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null)
|
||||
{%- if not loop.last %} or {% endif %}
|
||||
|
||||
{% endfor %}
|
||||
)
|
||||
{%- endset %}
|
||||
|
||||
{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
|
||||
|
||||
{% do return({
|
||||
"unique_key": primary_key,
|
||||
"updated_at": updated_at,
|
||||
"row_changed": row_changed_expr,
|
||||
"scd_id": scd_id_expr
|
||||
}) %}
|
||||
{% endmacro %}
|
||||
@@ -0,0 +1,59 @@
|
||||
{% materialization table, default %}
|
||||
{%- set identifier = model['alias'] -%}
|
||||
{%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}
|
||||
{%- set backup_identifier = model['name'] + '__dbt_backup' -%}
|
||||
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
{%- set target_relation = api.Relation.create(identifier=identifier,
|
||||
schema=schema,
|
||||
database=database,
|
||||
type='table') -%}
|
||||
{%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,
|
||||
schema=schema,
|
||||
database=database,
|
||||
type='table') -%}
|
||||
|
||||
/*
|
||||
See ../view/view.sql for more information about this relation.
|
||||
*/
|
||||
{%- set backup_relation_type = 'table' if old_relation is none else old_relation.type -%}
|
||||
{%- set backup_relation = api.Relation.create(identifier=backup_identifier,
|
||||
schema=schema,
|
||||
database=database,
|
||||
type=backup_relation_type) -%}
|
||||
|
||||
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
|
||||
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
|
||||
|
||||
|
||||
-- drop the temp relations if they exists for some reason
|
||||
{{ adapter.drop_relation(intermediate_relation) }}
|
||||
{{ adapter.drop_relation(backup_relation) }}
|
||||
|
||||
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
||||
|
||||
-- `BEGIN` happens here:
|
||||
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
||||
|
||||
-- build model
|
||||
{% call statement('main') -%}
|
||||
{{ create_table_as(False, intermediate_relation, sql) }}
|
||||
{%- endcall %}
|
||||
|
||||
-- cleanup
|
||||
{% if old_relation is not none %}
|
||||
{{ adapter.rename_relation(target_relation, backup_relation) }}
|
||||
{% endif %}
|
||||
|
||||
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
-- `COMMIT` happens here
|
||||
{{ adapter.commit() }}
|
||||
|
||||
-- finally, drop the existing/backup relation after the commit
|
||||
{{ drop_relation_if_exists(backup_relation) }}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
||||
{% endmaterialization %}
|
||||
@@ -0,0 +1,61 @@
|
||||
|
||||
{% macro handle_existing_table(full_refresh, old_relation) %}
|
||||
{{ adapter_macro("dbt.handle_existing_table", full_refresh, old_relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro default__handle_existing_table(full_refresh, old_relation) %}
|
||||
{{ adapter.drop_relation(old_relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{# /*
|
||||
Core materialization implementation. BigQuery and Snowflake are similar
|
||||
because both can use `create or replace view` where the resulting view schema
|
||||
is not necessarily the same as the existing view. On Redshift, this would
|
||||
result in: ERROR: cannot change number of columns in view
|
||||
|
||||
This implementation is superior to the create_temp, swap_with_existing, drop_old
|
||||
paradigm because transactions don't run DDL queries atomically on Snowflake. By using
|
||||
`create or replace view`, the materialization becomes atomic in nature.
|
||||
*/
|
||||
#}
|
||||
|
||||
{% macro create_or_replace_view(run_outside_transaction_hooks=True) %}
|
||||
{%- set identifier = model['alias'] -%}
|
||||
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
|
||||
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
|
||||
|
||||
{%- set target_relation = api.Relation.create(
|
||||
identifier=identifier, schema=schema, database=database,
|
||||
type='view') -%}
|
||||
|
||||
{% if run_outside_transaction_hooks %}
|
||||
-- no transactions on BigQuery
|
||||
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
||||
{% endif %}
|
||||
|
||||
-- `BEGIN` happens here on Snowflake
|
||||
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
||||
|
||||
-- If there's a table with the same name and we weren't told to full refresh,
|
||||
-- that's an error. If we were told to full refresh, drop it. This behavior differs
|
||||
-- for Snowflake and BigQuery, so multiple dispatch is used.
|
||||
{%- if old_relation is not none and old_relation.is_table -%}
|
||||
{{ handle_existing_table(flags.FULL_REFRESH, old_relation) }}
|
||||
{%- endif -%}
|
||||
|
||||
-- build model
|
||||
{% call statement('main') -%}
|
||||
{{ create_view_as(target_relation, sql) }}
|
||||
{%- endcall %}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
{{ adapter.commit() }}
|
||||
|
||||
{% if run_outside_transaction_hooks %}
|
||||
-- No transactions on BigQuery
|
||||
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
@@ -0,0 +1,62 @@
|
||||
{%- materialization view, default -%}
|
||||
|
||||
{%- set identifier = model['alias'] -%}
|
||||
{%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}
|
||||
{%- set backup_identifier = model['name'] + '__dbt_backup' -%}
|
||||
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
{%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database,
|
||||
type='view') -%}
|
||||
{%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,
|
||||
schema=schema, database=database, type='view') -%}
|
||||
|
||||
/*
|
||||
This relation (probably) doesn't exist yet. If it does exist, it's a leftover from
|
||||
a previous run, and we're going to try to drop it immediately. At the end of this
|
||||
materialization, we're going to rename the "old_relation" to this identifier,
|
||||
and then we're going to drop it. In order to make sure we run the correct one of:
|
||||
- drop view ...
|
||||
- drop table ...
|
||||
|
||||
We need to set the type of this relation to be the type of the old_relation, if it exists,
|
||||
or else "view" as a sane default if it does not. Note that if the old_relation does not
|
||||
exist, then there is nothing to move out of the way and subsequentally drop. In that case,
|
||||
this relation will be effectively unused.
|
||||
*/
|
||||
{%- set backup_relation_type = 'view' if old_relation is none else old_relation.type -%}
|
||||
{%- set backup_relation = api.Relation.create(identifier=backup_identifier,
|
||||
schema=schema, database=database,
|
||||
type=backup_relation_type) -%}
|
||||
|
||||
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
|
||||
|
||||
{{ run_hooks(pre_hooks, inside_transaction=False) }}
|
||||
|
||||
-- drop the temp relations if they exists for some reason
|
||||
{{ adapter.drop_relation(intermediate_relation) }}
|
||||
{{ adapter.drop_relation(backup_relation) }}
|
||||
|
||||
-- `BEGIN` happens here:
|
||||
{{ run_hooks(pre_hooks, inside_transaction=True) }}
|
||||
|
||||
-- build model
|
||||
{% call statement('main') -%}
|
||||
{{ create_view_as(intermediate_relation, sql) }}
|
||||
{%- endcall %}
|
||||
|
||||
-- cleanup
|
||||
-- move the existing view out of the way
|
||||
{% if old_relation is not none %}
|
||||
{{ adapter.rename_relation(target_relation, backup_relation) }}
|
||||
{% endif %}
|
||||
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=True) }}
|
||||
|
||||
{{ adapter.commit() }}
|
||||
|
||||
{{ drop_relation_if_exists(backup_relation) }}
|
||||
|
||||
{{ run_hooks(post_hooks, inside_transaction=False) }}
|
||||
|
||||
{%- endmaterialization -%}
|
||||
126
core/dbt/include/index.html
Normal file
126
core/dbt/include/index.html
Normal file
File diff suppressed because one or more lines are too long
272
core/dbt/linker.py
Normal file
272
core/dbt/linker.py
Normal file
@@ -0,0 +1,272 @@
|
||||
import networkx as nx
|
||||
import threading
|
||||
|
||||
from dbt.compat import PriorityQueue
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
|
||||
GRAPH_SERIALIZE_BLACKLIST = [
|
||||
'agate_table'
|
||||
]
|
||||
|
||||
|
||||
def from_file(graph_file):
|
||||
linker = Linker()
|
||||
linker.read_graph(graph_file)
|
||||
|
||||
return linker
|
||||
|
||||
|
||||
def is_blocking_dependency(node):
|
||||
return node.resource_type == NodeType.Model
|
||||
|
||||
|
||||
class GraphQueue(object):
|
||||
"""A fancy queue that is backed by the dependency graph.
|
||||
Note: this will mutate input!
|
||||
|
||||
This queue is thread-safe for `mark_done` calls, though you must ensure
|
||||
that separate threads do not call `.empty()` or `__len__()` and `.get()` at
|
||||
the same time, as there is an unlocked race!
|
||||
"""
|
||||
def __init__(self, graph, manifest):
|
||||
self.graph = graph
|
||||
self.manifest = manifest
|
||||
# store the queue as a priority queue.
|
||||
self.inner = PriorityQueue()
|
||||
# things that have been popped off the queue but not finished
|
||||
# and worker thread reservations
|
||||
self.in_progress = set()
|
||||
# things that are in the queue
|
||||
self.queued = set()
|
||||
# this lock controls most things
|
||||
self.lock = threading.Lock()
|
||||
# store the 'score' of each node as a number. Lower is higher priority.
|
||||
self._scores = self._calculate_scores()
|
||||
# populate the initial queue
|
||||
self._find_new_additions()
|
||||
|
||||
def get_node(self, node_id):
|
||||
return self.manifest.nodes[node_id]
|
||||
|
||||
def _include_in_cost(self, node_id):
|
||||
node = self.get_node(node_id)
|
||||
if not is_blocking_dependency(node):
|
||||
return False
|
||||
if node.get_materialization() == 'ephemeral':
|
||||
return False
|
||||
return True
|
||||
|
||||
def _calculate_scores(self):
|
||||
"""Calculate the 'value' of each node in the graph based on how many
|
||||
blocking descendants it has. We use this score for the internal
|
||||
priority queue's ordering, so the quality of this metric is important.
|
||||
|
||||
The score is stored as a negative number because the internal
|
||||
PriorityQueue picks lowest values first.
|
||||
|
||||
We could do this in one pass over the graph instead of len(self.graph)
|
||||
passes but this is easy. For large graphs this may hurt performance.
|
||||
|
||||
This operates on the graph, so it would require a lock if called from
|
||||
outside __init__.
|
||||
|
||||
:return Dict[str, int]: The score dict, mapping unique IDs to integer
|
||||
scores. Lower scores are higher priority.
|
||||
"""
|
||||
scores = {}
|
||||
for node in self.graph.nodes():
|
||||
score = -1 * len([
|
||||
d for d in nx.descendants(self.graph, node)
|
||||
if self._include_in_cost(d)
|
||||
])
|
||||
scores[node] = score
|
||||
return scores
|
||||
|
||||
def get(self, block=True, timeout=None):
|
||||
"""Get a node off the inner priority queue. By default, this blocks.
|
||||
|
||||
This takes the lock, but only for part of it.
|
||||
|
||||
:param bool block: If True, block until the inner queue has data
|
||||
:param Optional[float] timeout: If set, block for timeout seconds
|
||||
waiting for data.
|
||||
:return ParsedNode: The node as present in the manifest.
|
||||
|
||||
See `queue.PriorityQueue` for more information on `get()` behavior and
|
||||
exceptions.
|
||||
"""
|
||||
_, node_id = self.inner.get(block=block, timeout=timeout)
|
||||
with self.lock:
|
||||
self._mark_in_progress(node_id)
|
||||
return self.get_node(node_id)
|
||||
|
||||
def __len__(self):
|
||||
"""The length of the queue is the number of tasks left for the queue to
|
||||
give out, regardless of where they are. Incomplete tasks are not part
|
||||
of the length.
|
||||
|
||||
This takes the lock.
|
||||
"""
|
||||
with self.lock:
|
||||
return len(self.graph) - len(self.in_progress)
|
||||
|
||||
def empty(self):
|
||||
"""The graph queue is 'empty' if it all remaining nodes in the graph
|
||||
are in progress.
|
||||
|
||||
This takes the lock.
|
||||
"""
|
||||
return len(self) == 0
|
||||
|
||||
def _already_known(self, node):
|
||||
"""Decide if a node is already known (either handed out as a task, or
|
||||
in the queue).
|
||||
|
||||
Callers must hold the lock.
|
||||
|
||||
:param str node: The node ID to check
|
||||
:returns bool: If the node is in progress/queued.
|
||||
"""
|
||||
return node in self.in_progress or node in self.queued
|
||||
|
||||
def _find_new_additions(self):
|
||||
"""Find any nodes in the graph that need to be added to the internal
|
||||
queue and add them.
|
||||
|
||||
Callers must hold the lock.
|
||||
"""
|
||||
for node, in_degree in dict(self.graph.in_degree()).items():
|
||||
if not self._already_known(node) and in_degree == 0:
|
||||
self.inner.put((self._scores[node], node))
|
||||
self.queued.add(node)
|
||||
|
||||
def mark_done(self, node_id):
|
||||
"""Given a node's unique ID, mark it as done.
|
||||
|
||||
This method takes the lock.
|
||||
|
||||
:param str node_id: The node ID to mark as complete.
|
||||
"""
|
||||
with self.lock:
|
||||
self.in_progress.remove(node_id)
|
||||
self.graph.remove_node(node_id)
|
||||
self._find_new_additions()
|
||||
self.inner.task_done()
|
||||
|
||||
def _mark_in_progress(self, node_id):
|
||||
"""Mark the node as 'in progress'.
|
||||
|
||||
Callers must hold the lock.
|
||||
|
||||
:param str node_id: The node ID to mark as in progress.
|
||||
"""
|
||||
self.queued.remove(node_id)
|
||||
self.in_progress.add(node_id)
|
||||
|
||||
def join(self):
|
||||
"""Join the queue. Blocks until all tasks are marked as done.
|
||||
|
||||
Make sure not to call this before the queue reports that it is empty.
|
||||
"""
|
||||
self.inner.join()
|
||||
|
||||
|
||||
def _subset_graph(graph, include_nodes):
|
||||
"""Create and return a new graph that is a shallow copy of graph but with
|
||||
only the nodes in include_nodes. Transitive edges across removed nodes are
|
||||
preserved as explicit new edges.
|
||||
"""
|
||||
new_graph = nx.algorithms.transitive_closure(graph)
|
||||
|
||||
include_nodes = set(include_nodes)
|
||||
|
||||
for node in graph.nodes():
|
||||
if node not in include_nodes:
|
||||
new_graph.remove_node(node)
|
||||
|
||||
for node in include_nodes:
|
||||
if node not in new_graph:
|
||||
raise RuntimeError(
|
||||
"Couldn't find model '{}' -- does it exist or is "
|
||||
"it disabled?".format(node)
|
||||
)
|
||||
return new_graph
|
||||
|
||||
|
||||
class Linker(object):
|
||||
def __init__(self, data=None):
|
||||
if data is None:
|
||||
data = {}
|
||||
self.graph = nx.DiGraph(**data)
|
||||
|
||||
def edges(self):
|
||||
return self.graph.edges()
|
||||
|
||||
def nodes(self):
|
||||
return self.graph.nodes()
|
||||
|
||||
def find_cycles(self):
|
||||
# There's a networkx find_cycle function, but there's a bug in the
|
||||
# nx 1.11 release that prevents us from using it. We should use that
|
||||
# function when we upgrade to 2.X. More info:
|
||||
# https://github.com/networkx/networkx/pull/2473
|
||||
cycles = list(nx.simple_cycles(self.graph))
|
||||
|
||||
if len(cycles) > 0:
|
||||
cycle_nodes = cycles[0]
|
||||
cycle_nodes.append(cycle_nodes[0])
|
||||
return " --> ".join(cycle_nodes)
|
||||
|
||||
return None
|
||||
|
||||
def as_graph_queue(self, manifest, limit_to=None):
|
||||
"""Returns a queue over nodes in the graph that tracks progress of
|
||||
dependecies.
|
||||
"""
|
||||
if limit_to is None:
|
||||
graph_nodes = self.graph.nodes()
|
||||
else:
|
||||
graph_nodes = limit_to
|
||||
|
||||
new_graph = _subset_graph(self.graph, graph_nodes)
|
||||
return GraphQueue(new_graph, manifest)
|
||||
|
||||
def get_dependent_nodes(self, node):
|
||||
return nx.descendants(self.graph, node)
|
||||
|
||||
def dependency(self, node1, node2):
|
||||
"indicate that node1 depends on node2"
|
||||
self.graph.add_node(node1)
|
||||
self.graph.add_node(node2)
|
||||
self.graph.add_edge(node2, node1)
|
||||
|
||||
def add_node(self, node):
|
||||
self.graph.add_node(node)
|
||||
|
||||
def remove_node(self, node):
|
||||
children = nx.descendants(self.graph, node)
|
||||
self.graph.remove_node(node)
|
||||
return children
|
||||
|
||||
def write_graph(self, outfile, manifest):
|
||||
"""Write the graph to a gpickle file. Before doing so, serialize and
|
||||
include all nodes in their corresponding graph entries.
|
||||
"""
|
||||
out_graph = _updated_graph(self.graph, manifest)
|
||||
nx.write_gpickle(out_graph, outfile)
|
||||
|
||||
def read_graph(self, infile):
|
||||
self.graph = nx.read_gpickle(infile)
|
||||
|
||||
|
||||
def _updated_graph(graph, manifest):
|
||||
graph = graph.copy()
|
||||
for node_id in graph.nodes():
|
||||
# serialize() removes the agate table
|
||||
data = manifest.nodes[node_id].serialize()
|
||||
for key in GRAPH_SERIALIZE_BLACKLIST:
|
||||
if key in data:
|
||||
del data[key]
|
||||
graph.add_node(node_id, **data)
|
||||
return graph
|
||||
3
core/dbt/links.py
Normal file
3
core/dbt/links.py
Normal file
@@ -0,0 +1,3 @@
|
||||
ProfileConfigDocs = 'https://docs.getdbt.com/docs/configure-your-profile'
|
||||
SnowflakeQuotingDocs = 'https://docs.getdbt.com/v0.10/docs/configuring-quoting'
|
||||
IncrementalDocs = 'https://docs.getdbt.com/docs/configuring-incremental-models'
|
||||
265
core/dbt/loader.py
Normal file
265
core/dbt/loader.py
Normal file
@@ -0,0 +1,265 @@
|
||||
import os
|
||||
import itertools
|
||||
|
||||
from dbt.include.global_project import PACKAGES
|
||||
import dbt.exceptions
|
||||
import dbt.flags
|
||||
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.utils import timestring
|
||||
|
||||
from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \
|
||||
DocumentationParser, DataTestParser, HookParser, SchemaParser, \
|
||||
ParserUtils, SnapshotParser
|
||||
|
||||
from dbt.contracts.project import ProjectList
|
||||
|
||||
|
||||
class GraphLoader(object):
|
||||
def __init__(self, root_project, all_projects):
|
||||
self.root_project = root_project
|
||||
self.all_projects = all_projects
|
||||
self.nodes = {}
|
||||
self.docs = {}
|
||||
self.macros = {}
|
||||
self.tests = {}
|
||||
self.patches = {}
|
||||
self.disabled = []
|
||||
self.macro_manifest = None
|
||||
|
||||
def _load_sql_nodes(self, parser_type, resource_type, relative_dirs_attr,
|
||||
**kwargs):
|
||||
parser = parser_type(self.root_project, self.all_projects,
|
||||
self.macro_manifest)
|
||||
|
||||
for project_name, project in self.all_projects.items():
|
||||
parse_results = parser.load_and_parse(
|
||||
package_name=project_name,
|
||||
root_dir=project.project_root,
|
||||
relative_dirs=getattr(project, relative_dirs_attr),
|
||||
resource_type=resource_type,
|
||||
**kwargs
|
||||
)
|
||||
self.nodes.update(parse_results.parsed)
|
||||
self.disabled.extend(parse_results.disabled)
|
||||
|
||||
def _load_macros(self, internal_manifest=None):
|
||||
# skip any projects in the internal manifest
|
||||
all_projects = self.all_projects.copy()
|
||||
if internal_manifest is not None:
|
||||
for name in internal_project_names():
|
||||
all_projects.pop(name, None)
|
||||
self.macros.update(internal_manifest.macros)
|
||||
|
||||
# give the macroparser all projects but then only load what we haven't
|
||||
# loaded already
|
||||
parser = MacroParser(self.root_project, self.all_projects)
|
||||
for project_name, project in all_projects.items():
|
||||
self.macros.update(parser.load_and_parse(
|
||||
package_name=project_name,
|
||||
root_dir=project.project_root,
|
||||
relative_dirs=project.macro_paths,
|
||||
resource_type=NodeType.Macro,
|
||||
))
|
||||
|
||||
def _load_seeds(self):
|
||||
parser = SeedParser(self.root_project, self.all_projects,
|
||||
self.macro_manifest)
|
||||
for project_name, project in self.all_projects.items():
|
||||
self.nodes.update(parser.load_and_parse(
|
||||
package_name=project_name,
|
||||
root_dir=project.project_root,
|
||||
relative_dirs=project.data_paths,
|
||||
))
|
||||
|
||||
def _load_nodes(self):
|
||||
self._load_sql_nodes(ModelParser, NodeType.Model, 'source_paths')
|
||||
self._load_sql_nodes(SnapshotParser, NodeType.Snapshot,
|
||||
'snapshot_paths')
|
||||
self._load_sql_nodes(AnalysisParser, NodeType.Analysis,
|
||||
'analysis_paths')
|
||||
self._load_sql_nodes(DataTestParser, NodeType.Test, 'test_paths',
|
||||
tags=['data'])
|
||||
|
||||
hook_parser = HookParser(self.root_project, self.all_projects,
|
||||
self.macro_manifest)
|
||||
self.nodes.update(hook_parser.load_and_parse())
|
||||
|
||||
self._load_seeds()
|
||||
|
||||
def _load_docs(self):
|
||||
parser = DocumentationParser(self.root_project, self.all_projects)
|
||||
for project_name, project in self.all_projects.items():
|
||||
self.docs.update(parser.load_and_parse(
|
||||
package_name=project_name,
|
||||
root_dir=project.project_root,
|
||||
relative_dirs=project.docs_paths
|
||||
))
|
||||
|
||||
def _load_schema_tests(self):
|
||||
parser = SchemaParser(self.root_project, self.all_projects,
|
||||
self.macro_manifest)
|
||||
for project_name, project in self.all_projects.items():
|
||||
tests, patches, sources = parser.load_and_parse(
|
||||
package_name=project_name,
|
||||
root_dir=project.project_root,
|
||||
relative_dirs=project.source_paths
|
||||
)
|
||||
|
||||
for unique_id, test in tests.items():
|
||||
if unique_id in self.tests:
|
||||
dbt.exceptions.raise_duplicate_resource_name(
|
||||
test, self.tests[unique_id],
|
||||
)
|
||||
self.tests[unique_id] = test
|
||||
|
||||
for unique_id, source in sources.items():
|
||||
if unique_id in self.nodes:
|
||||
dbt.exceptions.raise_duplicate_resource_name(
|
||||
source, self.nodes[unique_id],
|
||||
)
|
||||
self.nodes[unique_id] = source
|
||||
|
||||
for name, patch in patches.items():
|
||||
if name in self.patches:
|
||||
dbt.exceptions.raise_duplicate_patch_name(
|
||||
name, patch, self.patches[name]
|
||||
)
|
||||
self.patches[name] = patch
|
||||
|
||||
def load(self, internal_manifest=None):
|
||||
self._load_macros(internal_manifest=internal_manifest)
|
||||
# make a manifest with just the macros to get the context
|
||||
self.macro_manifest = Manifest(macros=self.macros, nodes={}, docs={},
|
||||
generated_at=timestring(), disabled=[])
|
||||
self._load_nodes()
|
||||
self._load_docs()
|
||||
self._load_schema_tests()
|
||||
|
||||
def create_manifest(self):
|
||||
manifest = Manifest(
|
||||
nodes=self.nodes,
|
||||
macros=self.macros,
|
||||
docs=self.docs,
|
||||
generated_at=timestring(),
|
||||
config=self.root_project,
|
||||
disabled=self.disabled
|
||||
)
|
||||
manifest.add_nodes(self.tests)
|
||||
manifest.patch_nodes(self.patches)
|
||||
manifest = ParserUtils.process_sources(manifest, self.root_project)
|
||||
manifest = ParserUtils.process_refs(manifest,
|
||||
self.root_project.project_name)
|
||||
manifest = ParserUtils.process_docs(manifest, self.root_project)
|
||||
return manifest
|
||||
|
||||
@classmethod
|
||||
def _load_from_projects(cls, root_config, projects, internal_manifest):
|
||||
if dbt.flags.STRICT_MODE:
|
||||
ProjectList(**projects)
|
||||
|
||||
loader = cls(root_config, projects)
|
||||
loader.load(internal_manifest=internal_manifest)
|
||||
return loader.create_manifest()
|
||||
|
||||
@classmethod
|
||||
def load_all(cls, root_config, internal_manifest=None):
|
||||
projects = load_all_projects(root_config)
|
||||
manifest = cls._load_from_projects(root_config, projects,
|
||||
internal_manifest)
|
||||
_check_manifest(manifest, root_config)
|
||||
return manifest
|
||||
|
||||
@classmethod
|
||||
def load_internal(cls, root_config):
|
||||
projects = load_internal_projects(root_config)
|
||||
return cls._load_from_projects(root_config, projects, None)
|
||||
|
||||
|
||||
def _check_resource_uniqueness(manifest):
|
||||
names_resources = {}
|
||||
alias_resources = {}
|
||||
|
||||
for resource, node in manifest.nodes.items():
|
||||
if node.resource_type not in NodeType.refable():
|
||||
continue
|
||||
|
||||
name = node.name
|
||||
alias = "{}.{}".format(node.schema, node.alias)
|
||||
|
||||
existing_node = names_resources.get(name)
|
||||
if existing_node is not None:
|
||||
dbt.exceptions.raise_duplicate_resource_name(
|
||||
existing_node, node
|
||||
)
|
||||
|
||||
existing_alias = alias_resources.get(alias)
|
||||
if existing_alias is not None:
|
||||
dbt.exceptions.raise_ambiguous_alias(
|
||||
existing_alias, node
|
||||
)
|
||||
|
||||
names_resources[name] = node
|
||||
alias_resources[alias] = node
|
||||
|
||||
|
||||
def _warn_for_unused_resource_config_paths(manifest, config):
|
||||
resource_fqns = manifest.get_resource_fqns()
|
||||
disabled_fqns = [n.fqn for n in manifest.disabled]
|
||||
config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns)
|
||||
|
||||
|
||||
def _check_manifest(manifest, config):
|
||||
_check_resource_uniqueness(manifest)
|
||||
_warn_for_unused_resource_config_paths(manifest, config)
|
||||
|
||||
|
||||
def internal_project_names():
|
||||
return iter(PACKAGES.values())
|
||||
|
||||
|
||||
def _load_projects(config, paths):
|
||||
for path in paths:
|
||||
try:
|
||||
project = config.new_project(path)
|
||||
except dbt.exceptions.DbtProjectError as e:
|
||||
raise dbt.exceptions.DbtProjectError(
|
||||
'Failed to read package at {}: {}'
|
||||
.format(path, e)
|
||||
)
|
||||
else:
|
||||
yield project.project_name, project
|
||||
|
||||
|
||||
def _project_directories(config):
|
||||
root = os.path.join(config.project_root, config.modules_path)
|
||||
|
||||
dependencies = []
|
||||
if os.path.exists(root):
|
||||
dependencies = os.listdir(root)
|
||||
|
||||
for name in dependencies:
|
||||
full_obj = os.path.join(root, name)
|
||||
|
||||
if not os.path.isdir(full_obj) or name.startswith('__'):
|
||||
# exclude non-dirs and dirs that start with __
|
||||
# the latter could be something like __pycache__
|
||||
# for the global dbt modules dir
|
||||
continue
|
||||
|
||||
yield full_obj
|
||||
|
||||
|
||||
def load_all_projects(config):
|
||||
all_projects = {config.project_name: config}
|
||||
project_paths = itertools.chain(
|
||||
internal_project_names(),
|
||||
_project_directories(config)
|
||||
)
|
||||
all_projects.update(_load_projects(config, project_paths))
|
||||
return all_projects
|
||||
|
||||
|
||||
def load_internal_projects(config):
|
||||
return dict(_load_projects(config, internal_project_names()))
|
||||
229
core/dbt/logger.py
Normal file
229
core/dbt/logger.py
Normal file
@@ -0,0 +1,229 @@
|
||||
import dbt.compat
|
||||
import dbt.flags
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
import sys
|
||||
|
||||
import colorama
|
||||
|
||||
# Colorama needs some help on windows because we're using logger.info
|
||||
# intead of print(). If the Windows env doesn't have a TERM var set,
|
||||
# then we should override the logging stream to use the colorama
|
||||
# converter. If the TERM var is set (as with Git Bash), then it's safe
|
||||
# to send escape characters and no log handler injection is needed.
|
||||
colorama_stdout = sys.stdout
|
||||
colorama_wrap = True
|
||||
|
||||
colorama.init(wrap=colorama_wrap)
|
||||
|
||||
DEBUG = logging.DEBUG
|
||||
NOTICE = 15
|
||||
INFO = logging.INFO
|
||||
WARNING = logging.WARNING
|
||||
ERROR = logging.ERROR
|
||||
CRITICAL = logging.CRITICAL
|
||||
|
||||
logging.addLevelName(NOTICE, 'NOTICE')
|
||||
|
||||
|
||||
class Logger(logging.Logger):
|
||||
def notice(self, msg, *args, **kwargs):
|
||||
if self.isEnabledFor(NOTICE):
|
||||
self._log(NOTICE, msg, args, **kwargs)
|
||||
|
||||
|
||||
logging.setLoggerClass(Logger)
|
||||
|
||||
|
||||
if sys.platform == 'win32' and not os.environ.get('TERM'):
|
||||
colorama_wrap = False
|
||||
colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream
|
||||
|
||||
elif sys.platform == 'win32':
|
||||
colorama_wrap = False
|
||||
|
||||
colorama.init(wrap=colorama_wrap)
|
||||
|
||||
# create a global console logger for dbt
|
||||
stdout_handler = logging.StreamHandler(colorama_stdout)
|
||||
stdout_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
stdout_handler.setLevel(NOTICE)
|
||||
|
||||
stderr_handler = logging.StreamHandler(sys.stderr)
|
||||
stderr_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||
stderr_handler.setLevel(WARNING)
|
||||
|
||||
|
||||
logger = logging.getLogger('dbt')
|
||||
logger.addHandler(stdout_handler)
|
||||
logger.setLevel(DEBUG)
|
||||
logging.getLogger().setLevel(CRITICAL)
|
||||
|
||||
# Quiet these down in the logs
|
||||
logging.getLogger('botocore').setLevel(INFO)
|
||||
logging.getLogger('requests').setLevel(INFO)
|
||||
logging.getLogger('urllib3').setLevel(INFO)
|
||||
logging.getLogger('google').setLevel(INFO)
|
||||
logging.getLogger('snowflake.connector').setLevel(INFO)
|
||||
logging.getLogger('parsedatetime').setLevel(INFO)
|
||||
# we never want to seek werkzeug logs
|
||||
logging.getLogger('werkzeug').setLevel(CRITICAL)
|
||||
|
||||
# provide this for the cache.
|
||||
CACHE_LOGGER = logging.getLogger('dbt.cache')
|
||||
# add a dummy handler to avoid `No handlers could be found for logger`
|
||||
nothing_handler = logging.StreamHandler()
|
||||
nothing_handler.setLevel(CRITICAL)
|
||||
CACHE_LOGGER.addHandler(nothing_handler)
|
||||
# provide this for RPC connection logging
|
||||
RPC_LOGGER = logging.getLogger('dbt.rpc')
|
||||
|
||||
|
||||
# Redirect warnings through our logging setup
|
||||
# They will be logged to a file below
|
||||
logging.captureWarnings(True)
|
||||
dbt.compat.suppress_warnings()
|
||||
|
||||
initialized = False
|
||||
|
||||
|
||||
def _swap_handler(logger, old, new):
|
||||
if old in logger.handlers:
|
||||
logger.handlers.remove(old)
|
||||
if new not in logger.handlers:
|
||||
logger.addHandler(new)
|
||||
|
||||
|
||||
def log_to_stderr(logger):
|
||||
_swap_handler(logger, stdout_handler, stderr_handler)
|
||||
|
||||
|
||||
def log_to_stdout(logger):
|
||||
_swap_handler(logger, stderr_handler, stdout_handler)
|
||||
|
||||
|
||||
def make_log_dir_if_missing(log_dir):
|
||||
import dbt.clients.system
|
||||
dbt.clients.system.make_directory(log_dir)
|
||||
|
||||
|
||||
class ColorFilter(logging.Filter):
|
||||
def filter(self, record):
|
||||
subbed = dbt.compat.to_string(record.msg)
|
||||
for escape_sequence in dbt.ui.colors.COLORS.values():
|
||||
subbed = subbed.replace(escape_sequence, '')
|
||||
record.msg = subbed
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def default_formatter():
|
||||
return logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s')
|
||||
|
||||
|
||||
def initialize_logger(debug_mode=False, path=None):
|
||||
global initialized, logger, stdout_handler, stderr_handler
|
||||
|
||||
if initialized:
|
||||
return
|
||||
|
||||
if debug_mode:
|
||||
# we'll only use one of these, but just set both up
|
||||
stdout_handler.setFormatter(default_formatter())
|
||||
stdout_handler.setLevel(DEBUG)
|
||||
stderr_handler.setFormatter(default_formatter())
|
||||
stderr_handler.setLevel(DEBUG)
|
||||
|
||||
if path is not None:
|
||||
make_log_dir_if_missing(path)
|
||||
log_path = os.path.join(path, 'dbt.log')
|
||||
|
||||
# log to directory as well
|
||||
logdir_handler = logging.handlers.TimedRotatingFileHandler(
|
||||
filename=log_path,
|
||||
when='d',
|
||||
interval=1,
|
||||
backupCount=7,
|
||||
)
|
||||
|
||||
color_filter = ColorFilter()
|
||||
logdir_handler.addFilter(color_filter)
|
||||
|
||||
logdir_handler.setFormatter(default_formatter())
|
||||
logdir_handler.setLevel(DEBUG)
|
||||
|
||||
logger.addHandler(logdir_handler)
|
||||
|
||||
# Log Python warnings to file
|
||||
warning_logger = logging.getLogger('py.warnings')
|
||||
warning_logger.addHandler(logdir_handler)
|
||||
warning_logger.setLevel(DEBUG)
|
||||
|
||||
initialized = True
|
||||
|
||||
|
||||
def logger_initialized():
|
||||
return initialized
|
||||
|
||||
|
||||
def log_cache_events(flag):
|
||||
"""Set the cache logger to propagate its messages based on the given flag.
|
||||
"""
|
||||
CACHE_LOGGER.propagate = flag
|
||||
|
||||
|
||||
GLOBAL_LOGGER = logger
|
||||
|
||||
|
||||
class QueueFormatter(logging.Formatter):
|
||||
def formatMessage(self, record):
|
||||
superself = super(QueueFormatter, self)
|
||||
if hasattr(superself, 'formatMessage'):
|
||||
# python 3.x
|
||||
return superself.formatMessage(record)
|
||||
|
||||
# python 2.x, handling weird unicode things
|
||||
try:
|
||||
return self._fmt % record.__dict__
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
record.name = record.name.decode('utf-8')
|
||||
return self._fmt % record.__dict__
|
||||
except UnicodeDecodeError as e:
|
||||
raise e
|
||||
|
||||
def format(self, record):
|
||||
record.message = record.getMessage()
|
||||
record.asctime = self.formatTime(record, self.datefmt)
|
||||
formatted = self.formatMessage(record)
|
||||
|
||||
output = {
|
||||
'message': formatted,
|
||||
'timestamp': record.asctime,
|
||||
'levelname': record.levelname,
|
||||
'level': record.levelno,
|
||||
}
|
||||
if record.exc_info:
|
||||
if not record.exc_text:
|
||||
record.exc_text = self.formatException(record.exc_info)
|
||||
output['exc_info'] = record.exc_text
|
||||
return output
|
||||
|
||||
|
||||
class QueueLogHandler(logging.Handler):
|
||||
def __init__(self, queue):
|
||||
super(QueueLogHandler, self).__init__()
|
||||
self.queue = queue
|
||||
|
||||
def emit(self, record):
|
||||
msg = self.format(record)
|
||||
self.queue.put_nowait(['log', msg])
|
||||
|
||||
|
||||
def add_queue_handler(queue):
|
||||
"""Add a queue log handler to the global logger."""
|
||||
handler = QueueLogHandler(queue)
|
||||
handler.setFormatter(QueueFormatter())
|
||||
handler.setLevel(DEBUG)
|
||||
GLOBAL_LOGGER.addHandler(handler)
|
||||
811
core/dbt/main.py
Normal file
811
core/dbt/main.py
Normal file
@@ -0,0 +1,811 @@
|
||||
from dbt.logger import initialize_logger, GLOBAL_LOGGER as logger, \
|
||||
logger_initialized, log_cache_events
|
||||
|
||||
import argparse
|
||||
import os.path
|
||||
import sys
|
||||
import traceback
|
||||
from contextlib import contextmanager
|
||||
|
||||
import dbt.version
|
||||
import dbt.flags as flags
|
||||
import dbt.task.run as run_task
|
||||
import dbt.task.compile as compile_task
|
||||
import dbt.task.debug as debug_task
|
||||
import dbt.task.clean as clean_task
|
||||
import dbt.task.deps as deps_task
|
||||
import dbt.task.init as init_task
|
||||
import dbt.task.seed as seed_task
|
||||
import dbt.task.test as test_task
|
||||
import dbt.task.snapshot as snapshot_task
|
||||
import dbt.task.generate as generate_task
|
||||
import dbt.task.serve as serve_task
|
||||
import dbt.task.freshness as freshness_task
|
||||
import dbt.task.run_operation as run_operation_task
|
||||
from dbt.task.list import ListTask
|
||||
from dbt.task.migrate import MigrationTask
|
||||
from dbt.task.rpc_server import RPCServerTask
|
||||
from dbt.adapters.factory import reset_adapters
|
||||
|
||||
import dbt.tracking
|
||||
import dbt.ui.printer
|
||||
import dbt.compat
|
||||
import dbt.deprecations
|
||||
import dbt.profiler
|
||||
|
||||
from dbt.utils import ExitCodes
|
||||
from dbt.config import UserConfig, PROFILES_DIR
|
||||
from dbt.exceptions import RuntimeException
|
||||
|
||||
|
||||
PROFILES_HELP_MESSAGE = """
|
||||
For more information on configuring profiles, please consult the dbt docs:
|
||||
|
||||
https://docs.getdbt.com/docs/configure-your-profile
|
||||
"""
|
||||
|
||||
|
||||
class DBTVersion(argparse.Action):
|
||||
"""This is very very similar to the builtin argparse._Version action,
|
||||
except it just calls dbt.version.get_version_information().
|
||||
"""
|
||||
def __init__(self,
|
||||
option_strings,
|
||||
version=None,
|
||||
dest=argparse.SUPPRESS,
|
||||
default=argparse.SUPPRESS,
|
||||
help="show program's version number and exit"):
|
||||
super(DBTVersion, self).__init__(
|
||||
option_strings=option_strings,
|
||||
dest=dest,
|
||||
default=default,
|
||||
nargs=0,
|
||||
help=help)
|
||||
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
formatter = parser._get_formatter()
|
||||
formatter.add_text(dbt.version.get_version_information())
|
||||
parser.exit(message=formatter.format_help())
|
||||
|
||||
|
||||
class DBTArgumentParser(argparse.ArgumentParser):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DBTArgumentParser, self).__init__(*args, **kwargs)
|
||||
self.register('action', 'dbtversion', DBTVersion)
|
||||
|
||||
|
||||
def main(args=None):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
try:
|
||||
results, succeeded = handle_and_check(args)
|
||||
if succeeded:
|
||||
exit_code = ExitCodes.Success
|
||||
else:
|
||||
exit_code = ExitCodes.ModelError
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("ctrl-c")
|
||||
exit_code = ExitCodes.UnhandledError
|
||||
|
||||
# This can be thrown by eg. argparse
|
||||
except SystemExit as e:
|
||||
exit_code = e.code
|
||||
|
||||
except BaseException as e:
|
||||
logger.warn("Encountered an error:")
|
||||
logger.warn(str(e))
|
||||
|
||||
if logger_initialized():
|
||||
logger.debug(traceback.format_exc())
|
||||
elif not isinstance(e, RuntimeException):
|
||||
# if it did not come from dbt proper and the logger is not
|
||||
# initialized (so there's no safe path to log to), log the stack
|
||||
# trace at error level.
|
||||
logger.error(traceback.format_exc())
|
||||
exit_code = ExitCodes.UnhandledError
|
||||
|
||||
_python2_compatibility_message()
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
# here for backwards compatibility
|
||||
def handle(args):
|
||||
res, success = handle_and_check(args)
|
||||
return res
|
||||
|
||||
|
||||
def initialize_config_values(parsed):
|
||||
"""Given the parsed args, initialize the dbt tracking code.
|
||||
|
||||
It would be nice to re-use this profile later on instead of parsing it
|
||||
twice, but dbt's intialization is not structured in a way that makes that
|
||||
easy.
|
||||
"""
|
||||
try:
|
||||
cfg = UserConfig.from_directory(parsed.profiles_dir)
|
||||
except RuntimeException:
|
||||
cfg = UserConfig.from_dict(None)
|
||||
|
||||
cfg.set_values(parsed.profiles_dir)
|
||||
|
||||
|
||||
def handle_and_check(args):
|
||||
parsed = parse_args(args)
|
||||
profiler_enabled = False
|
||||
|
||||
if parsed.record_timing_info:
|
||||
profiler_enabled = True
|
||||
|
||||
with dbt.profiler.profiler(
|
||||
enable=profiler_enabled,
|
||||
outfile=parsed.record_timing_info
|
||||
):
|
||||
|
||||
initialize_config_values(parsed)
|
||||
|
||||
reset_adapters()
|
||||
|
||||
task, res = run_from_args(parsed)
|
||||
success = task.interpret_results(res)
|
||||
|
||||
return res, success
|
||||
|
||||
|
||||
@contextmanager
|
||||
def track_run(task):
|
||||
dbt.tracking.track_invocation_start(config=task.config, args=task.args)
|
||||
try:
|
||||
yield
|
||||
dbt.tracking.track_invocation_end(
|
||||
config=task.config, args=task.args, result_type="ok"
|
||||
)
|
||||
except (dbt.exceptions.NotImplementedException,
|
||||
dbt.exceptions.FailedToConnectException) as e:
|
||||
logger.error('ERROR: {}'.format(e))
|
||||
dbt.tracking.track_invocation_end(
|
||||
config=task.config, args=task.args, result_type="error"
|
||||
)
|
||||
except Exception:
|
||||
dbt.tracking.track_invocation_end(
|
||||
config=task.config, args=task.args, result_type="error"
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
dbt.tracking.flush()
|
||||
|
||||
|
||||
_PYTHON_27_WARNING = '''
|
||||
Python 2.7 will reach the end of its life on January 1st, 2020.
|
||||
Please upgrade your Python as Python 2.7 won't be maintained after that date.
|
||||
A future version of dbt will drop support for Python 2.7.
|
||||
'''.strip()
|
||||
|
||||
|
||||
def _python2_compatibility_message():
|
||||
if dbt.compat.WHICH_PYTHON != 2:
|
||||
return
|
||||
|
||||
logger.critical(
|
||||
dbt.ui.printer.red('DEPRECATION: ') + _PYTHON_27_WARNING
|
||||
)
|
||||
|
||||
|
||||
def run_from_args(parsed):
|
||||
log_cache_events(getattr(parsed, 'log_cache_events', False))
|
||||
flags.set_from_args(parsed)
|
||||
|
||||
parsed.cls.pre_init_hook()
|
||||
logger.info("Running with dbt{}".format(dbt.version.installed))
|
||||
|
||||
# this will convert DbtConfigErrors into RuntimeExceptions
|
||||
task = parsed.cls.from_args(args=parsed)
|
||||
logger.debug("running dbt with arguments %s", parsed)
|
||||
|
||||
log_path = None
|
||||
if task.config is not None:
|
||||
log_path = getattr(task.config, 'log_path', None)
|
||||
initialize_logger(parsed.debug, log_path)
|
||||
logger.debug("Tracking: {}".format(dbt.tracking.active_user.state()))
|
||||
|
||||
results = None
|
||||
|
||||
with track_run(task):
|
||||
results = task.run()
|
||||
|
||||
return task, results
|
||||
|
||||
|
||||
def _build_base_subparser():
|
||||
base_subparser = argparse.ArgumentParser(add_help=False)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--project-dir',
|
||||
default=None,
|
||||
type=str,
|
||||
help="""
|
||||
Which directory to look in for the dbt_project.yml file.
|
||||
Default is the current working directory and its parents.
|
||||
"""
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--profiles-dir',
|
||||
default=PROFILES_DIR,
|
||||
type=str,
|
||||
help="""
|
||||
Which directory to look in for the profiles.yml file. Default = {}
|
||||
""".format(PROFILES_DIR)
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--profile',
|
||||
required=False,
|
||||
type=str,
|
||||
help="""
|
||||
Which profile to load. Overrides setting in dbt_project.yml.
|
||||
"""
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--target',
|
||||
default=None,
|
||||
type=str,
|
||||
help='Which target to load for the given profile'
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--vars',
|
||||
type=str,
|
||||
default='{}',
|
||||
help="""
|
||||
Supply variables to the project. This argument overrides
|
||||
variables defined in your dbt_project.yml file. This argument
|
||||
should be a YAML string, eg. '{my_variable: my_value}'"""
|
||||
)
|
||||
|
||||
# if set, log all cache events. This is extremely verbose!
|
||||
base_subparser.add_argument(
|
||||
'--log-cache-events',
|
||||
action='store_true',
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--bypass-cache',
|
||||
action='store_false',
|
||||
dest='use_cache',
|
||||
help='If set, bypass the adapter-level cache of database state',
|
||||
)
|
||||
return base_subparser
|
||||
|
||||
|
||||
def _build_docs_subparser(subparsers, base_subparser):
|
||||
docs_sub = subparsers.add_parser(
|
||||
'docs',
|
||||
parents=[base_subparser],
|
||||
help="Generate or serve the documentation "
|
||||
"website for your project.")
|
||||
return docs_sub
|
||||
|
||||
|
||||
def _build_source_subparser(subparsers, base_subparser):
|
||||
source_sub = subparsers.add_parser(
|
||||
'source',
|
||||
parents=[base_subparser],
|
||||
help="Manage your project's sources")
|
||||
return source_sub
|
||||
|
||||
|
||||
def _build_init_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'init',
|
||||
parents=[base_subparser],
|
||||
help="Initialize a new DBT project.")
|
||||
sub.add_argument('project_name', type=str, help='Name of the new project')
|
||||
sub.set_defaults(cls=init_task.InitTask, which='init')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_clean_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'clean',
|
||||
parents=[base_subparser],
|
||||
help="Delete all folders in the clean-targets list"
|
||||
"\n(usually the dbt_modules and target directories.)")
|
||||
sub.set_defaults(cls=clean_task.CleanTask, which='clean')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_debug_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'debug',
|
||||
parents=[base_subparser],
|
||||
help="Show some helpful information about dbt for debugging."
|
||||
"\nNot to be confused with the --debug option which increases "
|
||||
"verbosity.")
|
||||
sub.add_argument(
|
||||
'--config-dir',
|
||||
action='store_true',
|
||||
help="""
|
||||
If specified, DBT will show path information for this project
|
||||
"""
|
||||
)
|
||||
sub.set_defaults(cls=debug_task.DebugTask, which='debug')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_deps_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'deps',
|
||||
parents=[base_subparser],
|
||||
help="Pull the most recent version of the dependencies "
|
||||
"listed in packages.yml")
|
||||
sub.set_defaults(cls=deps_task.DepsTask, which='deps')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_snapshot_subparser(subparsers, base_subparser, which='snapshot'):
|
||||
if which == 'archive':
|
||||
helpmsg = (
|
||||
'DEPRECATED: This command is deprecated and will\n'
|
||||
'be removed in a future release. Use dbt snapshot instead.'
|
||||
)
|
||||
else:
|
||||
helpmsg = 'Execute snapshots defined in your project'
|
||||
|
||||
sub = subparsers.add_parser(
|
||||
which,
|
||||
parents=[base_subparser],
|
||||
help=helpmsg)
|
||||
sub.add_argument(
|
||||
'--threads',
|
||||
type=int,
|
||||
required=False,
|
||||
help="""
|
||||
Specify number of threads to use while snapshotting tables. Overrides
|
||||
settings in profiles.yml.
|
||||
"""
|
||||
)
|
||||
sub.set_defaults(cls=snapshot_task.SnapshotTask, which=which)
|
||||
return sub
|
||||
|
||||
|
||||
def _build_run_subparser(subparsers, base_subparser):
|
||||
run_sub = subparsers.add_parser(
|
||||
'run',
|
||||
parents=[base_subparser],
|
||||
help="Compile SQL and execute against the current "
|
||||
"target database.")
|
||||
run_sub.set_defaults(cls=run_task.RunTask, which='run')
|
||||
return run_sub
|
||||
|
||||
|
||||
def _build_compile_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'compile',
|
||||
parents=[base_subparser],
|
||||
help="Generates executable SQL from source model, test, and"
|
||||
"analysis files. \nCompiled SQL files are written to the target/"
|
||||
"directory.")
|
||||
sub.set_defaults(cls=compile_task.CompileTask, which='compile')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_docs_generate_subparser(subparsers, base_subparser):
|
||||
# it might look like docs_sub is the correct parents entry, but that
|
||||
# will cause weird errors about 'conflicting option strings'.
|
||||
generate_sub = subparsers.add_parser('generate', parents=[base_subparser])
|
||||
generate_sub.set_defaults(cls=generate_task.GenerateTask,
|
||||
which='generate')
|
||||
generate_sub.add_argument(
|
||||
'--no-compile',
|
||||
action='store_false',
|
||||
dest='compile',
|
||||
help='Do not run "dbt compile" as part of docs generation'
|
||||
)
|
||||
return generate_sub
|
||||
|
||||
|
||||
def _add_selection_arguments(*subparsers, **kwargs):
|
||||
models_name = kwargs.get('models_name', 'models')
|
||||
for sub in subparsers:
|
||||
sub.add_argument(
|
||||
'-{}'.format(models_name[0]),
|
||||
'--{}'.format(models_name),
|
||||
dest='models',
|
||||
required=False,
|
||||
nargs='+',
|
||||
help="""
|
||||
Specify the models to include.
|
||||
"""
|
||||
)
|
||||
sub.add_argument(
|
||||
'--exclude',
|
||||
required=False,
|
||||
nargs='+',
|
||||
help="""
|
||||
Specify the models to exclude.
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _add_table_mutability_arguments(*subparsers):
|
||||
for sub in subparsers:
|
||||
sub.add_argument(
|
||||
'--full-refresh',
|
||||
action='store_true',
|
||||
help="""
|
||||
If specified, DBT will drop incremental models and
|
||||
fully-recalculate the incremental table from the model definition.
|
||||
""")
|
||||
|
||||
|
||||
def _add_common_arguments(*subparsers):
|
||||
for sub in subparsers:
|
||||
sub.add_argument(
|
||||
'--threads',
|
||||
type=int,
|
||||
required=False,
|
||||
help="""
|
||||
Specify number of threads to use while executing models. Overrides
|
||||
settings in profiles.yml.
|
||||
"""
|
||||
)
|
||||
sub.add_argument(
|
||||
'--no-version-check',
|
||||
dest='version_check',
|
||||
action='store_false',
|
||||
help="""
|
||||
If set, skip ensuring dbt's version matches the one specified in
|
||||
the dbt_project.yml file ('require-dbt-version')
|
||||
""")
|
||||
|
||||
|
||||
def _build_seed_subparser(subparsers, base_subparser):
|
||||
seed_sub = subparsers.add_parser(
|
||||
'seed',
|
||||
parents=[base_subparser],
|
||||
help="Load data from csv files into your data warehouse.")
|
||||
seed_sub.add_argument(
|
||||
'--full-refresh',
|
||||
action='store_true',
|
||||
help='Drop existing seed tables and recreate them'
|
||||
)
|
||||
seed_sub.add_argument(
|
||||
'--show',
|
||||
action='store_true',
|
||||
help='Show a sample of the loaded data in the terminal'
|
||||
)
|
||||
seed_sub.set_defaults(cls=seed_task.SeedTask, which='seed')
|
||||
return seed_sub
|
||||
|
||||
|
||||
def _build_docs_serve_subparser(subparsers, base_subparser):
|
||||
serve_sub = subparsers.add_parser('serve', parents=[base_subparser])
|
||||
serve_sub.add_argument(
|
||||
'--port',
|
||||
default=8080,
|
||||
type=int,
|
||||
help='Specify the port number for the docs server.'
|
||||
)
|
||||
serve_sub.set_defaults(cls=serve_task.ServeTask, which='serve')
|
||||
return serve_sub
|
||||
|
||||
|
||||
def _build_test_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'test',
|
||||
parents=[base_subparser],
|
||||
help="Runs tests on data in deployed models."
|
||||
"Run this after `dbt run`")
|
||||
sub.add_argument(
|
||||
'--data',
|
||||
action='store_true',
|
||||
help='Run data tests defined in "tests" directory.'
|
||||
)
|
||||
sub.add_argument(
|
||||
'--schema',
|
||||
action='store_true',
|
||||
help='Run constraint validations from schema.yml files'
|
||||
)
|
||||
|
||||
sub.set_defaults(cls=test_task.TestTask, which='test')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_source_snapshot_freshness_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'snapshot-freshness',
|
||||
parents=[base_subparser],
|
||||
help="Snapshots the current freshness of the project's sources",
|
||||
)
|
||||
sub.add_argument(
|
||||
'-s',
|
||||
'--select',
|
||||
required=False,
|
||||
nargs='+',
|
||||
help="""
|
||||
Specify the sources to snapshot freshness
|
||||
""",
|
||||
dest='selected'
|
||||
)
|
||||
sub.add_argument(
|
||||
'-o',
|
||||
'--output',
|
||||
required=False,
|
||||
help="""
|
||||
Specify the output path for the json report. By default, outputs to
|
||||
target/sources.json
|
||||
"""
|
||||
)
|
||||
sub.add_argument(
|
||||
'--threads',
|
||||
type=int,
|
||||
required=False,
|
||||
help="""
|
||||
Specify number of threads to use. Overrides settings in profiles.yml
|
||||
"""
|
||||
)
|
||||
sub.set_defaults(cls=freshness_task.FreshnessTask,
|
||||
which='snapshot-freshness')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_rpc_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'rpc',
|
||||
parents=[base_subparser],
|
||||
help='Start a json-rpc server',
|
||||
)
|
||||
sub.add_argument(
|
||||
'--host',
|
||||
default='0.0.0.0',
|
||||
help='Specify the host to listen on for the rpc server.'
|
||||
)
|
||||
sub.add_argument(
|
||||
'--port',
|
||||
default=8580,
|
||||
type=int,
|
||||
help='Specify the port number for the rpc server.'
|
||||
)
|
||||
sub.set_defaults(cls=RPCServerTask, which='rpc')
|
||||
# the rpc task does a 'compile', so we need these attributes to exist, but
|
||||
# we don't want users to be allowed to set them.
|
||||
sub.set_defaults(models=None, exclude=None)
|
||||
return sub
|
||||
|
||||
|
||||
def _build_list_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'list',
|
||||
parents=[base_subparser],
|
||||
help='List the resources in your project'
|
||||
)
|
||||
sub.set_defaults(cls=ListTask, which='list')
|
||||
resource_values = list(ListTask.ALL_RESOURCE_VALUES) + ['default', 'all']
|
||||
sub.add_argument('--resource-type',
|
||||
choices=resource_values,
|
||||
action='append',
|
||||
default=[],
|
||||
dest='resource_types')
|
||||
sub.add_argument('--output',
|
||||
choices=['json', 'name', 'path', 'selector'],
|
||||
default='selector')
|
||||
sub.add_argument(
|
||||
'-s',
|
||||
'--select',
|
||||
required=False,
|
||||
nargs='+',
|
||||
metavar='SELECTOR',
|
||||
help="Specify the nodes to select.",
|
||||
)
|
||||
sub.add_argument(
|
||||
'-m',
|
||||
'--models',
|
||||
required=False,
|
||||
nargs='+',
|
||||
metavar='SELECTOR',
|
||||
help="Specify the models to select and set the resource-type to "
|
||||
"'model'. Mutually exclusive with '--select' (or '-s') and "
|
||||
"'--resource-type'",
|
||||
)
|
||||
sub.add_argument(
|
||||
'--exclude',
|
||||
required=False,
|
||||
nargs='+',
|
||||
metavar='SELECTOR',
|
||||
help="Specify the models to exclude."
|
||||
)
|
||||
# in python 3.x you can use the 'aliases' kwarg, but in python 2.7 you get
|
||||
# to do this
|
||||
subparsers._name_parser_map['ls'] = sub
|
||||
return sub
|
||||
|
||||
|
||||
def _build_run_operation_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'run-operation',
|
||||
parents=[base_subparser],
|
||||
help="""
|
||||
(beta) Run the named macro with any supplied arguments. This
|
||||
subcommand is unstable and subject to change in a future release
|
||||
of dbt. Please use it with caution"""
|
||||
)
|
||||
sub.add_argument(
|
||||
'macro',
|
||||
help="""
|
||||
Specify the macro to invoke. dbt will call this macro with the
|
||||
supplied arguments and then exit"""
|
||||
)
|
||||
sub.add_argument(
|
||||
'--args',
|
||||
type=str,
|
||||
default='{}',
|
||||
help="""
|
||||
Supply arguments to the macro. This dictionary will be mapped
|
||||
to the keyword arguments defined in the selected macro. This
|
||||
argument should be a YAML string, eg. '{my_variable: my_value}'"""
|
||||
)
|
||||
sub.set_defaults(cls=run_operation_task.RunOperationTask,
|
||||
which='run-operation')
|
||||
return sub
|
||||
|
||||
|
||||
def _build_snapshot_migrate_subparser(subparsers, base_subparser):
|
||||
sub = subparsers.add_parser(
|
||||
'snapshot-migrate',
|
||||
parents=[base_subparser],
|
||||
help='Run the snapshot migration script'
|
||||
)
|
||||
sub.add_argument(
|
||||
'--from-archive',
|
||||
action='store_true',
|
||||
help=('This flag is required for the 0.14.0 archive to snapshot '
|
||||
'migration')
|
||||
)
|
||||
sub.add_argument(
|
||||
'--apply-files',
|
||||
action='store_true',
|
||||
dest='write_files',
|
||||
help='If set, write .sql files to disk instead of logging them'
|
||||
)
|
||||
sub.add_argument(
|
||||
'--apply-database',
|
||||
action='store_true',
|
||||
dest='migrate_database',
|
||||
help='If set, perform just the database migration'
|
||||
)
|
||||
sub.add_argument(
|
||||
'--apply',
|
||||
action='store_true',
|
||||
help='If set, implies --apply-database --apply-files'
|
||||
)
|
||||
sub.set_defaults(cls=MigrationTask, which='migration')
|
||||
|
||||
|
||||
def parse_args(args):
|
||||
p = DBTArgumentParser(
|
||||
prog='dbt',
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
description="An ELT tool for managing your SQL "
|
||||
"transformations and data models."
|
||||
"\nFor more documentation on these commands, visit: "
|
||||
"docs.getdbt.com",
|
||||
epilog="Specify one of these sub-commands and you can "
|
||||
"find more help from there.")
|
||||
|
||||
p.add_argument(
|
||||
'--version',
|
||||
action='dbtversion',
|
||||
help="Show version information")
|
||||
|
||||
p.add_argument(
|
||||
'-r',
|
||||
'--record-timing-info',
|
||||
default=None,
|
||||
type=str,
|
||||
help="""
|
||||
When this option is passed, dbt will output low-level timing
|
||||
stats to the specified file. Example:
|
||||
`--record-timing-info output.profile`
|
||||
"""
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'-d',
|
||||
'--debug',
|
||||
action='store_true',
|
||||
help='''Display debug logging during dbt execution. Useful for
|
||||
debugging and making bug reports.''')
|
||||
|
||||
p.add_argument(
|
||||
'-S',
|
||||
'--strict',
|
||||
action='store_true',
|
||||
help='''Run schema validations at runtime. This will surface
|
||||
bugs in dbt, but may incur a performance penalty.''')
|
||||
|
||||
p.add_argument(
|
||||
'--warn-error',
|
||||
action='store_true',
|
||||
help='''If dbt would normally warn, instead raise an exception.
|
||||
Examples include --models that selects nothing, deprecations,
|
||||
configurations with no associated models, invalid test configurations,
|
||||
and missing sources/refs in tests''')
|
||||
|
||||
# if set, run dbt in single-threaded mode: thread count is ignored, and
|
||||
# calls go through `map` instead of the thread pool. This is useful for
|
||||
# getting performance information about aspects of dbt that normally run in
|
||||
# a thread, as the profiler ignores child threads. Users should really
|
||||
# never use this.
|
||||
p.add_argument(
|
||||
'--single-threaded',
|
||||
action='store_true',
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
|
||||
# if set, extract all models and blocks with the jinja block extractor, and
|
||||
# verify that we don't fail anywhere the actual jinja parser passes. The
|
||||
# reverse (passing files that ends up failing jinja) is fine.
|
||||
p.add_argument(
|
||||
'--test-new-parser',
|
||||
action='store_true',
|
||||
help=argparse.SUPPRESS
|
||||
)
|
||||
|
||||
subs = p.add_subparsers(title="Available sub-commands")
|
||||
|
||||
base_subparser = _build_base_subparser()
|
||||
|
||||
# make the subcommands that have their own subcommands
|
||||
docs_sub = _build_docs_subparser(subs, base_subparser)
|
||||
docs_subs = docs_sub.add_subparsers(title="Available sub-commands")
|
||||
source_sub = _build_source_subparser(subs, base_subparser)
|
||||
source_subs = source_sub.add_subparsers(title="Available sub-commands")
|
||||
|
||||
_build_init_subparser(subs, base_subparser)
|
||||
_build_clean_subparser(subs, base_subparser)
|
||||
_build_debug_subparser(subs, base_subparser)
|
||||
_build_deps_subparser(subs, base_subparser)
|
||||
_build_list_subparser(subs, base_subparser)
|
||||
_build_snapshot_migrate_subparser(subs, base_subparser)
|
||||
|
||||
snapshot_sub = _build_snapshot_subparser(subs, base_subparser)
|
||||
archive_sub = _build_snapshot_subparser(subs, base_subparser, 'archive')
|
||||
rpc_sub = _build_rpc_subparser(subs, base_subparser)
|
||||
run_sub = _build_run_subparser(subs, base_subparser)
|
||||
compile_sub = _build_compile_subparser(subs, base_subparser)
|
||||
generate_sub = _build_docs_generate_subparser(docs_subs, base_subparser)
|
||||
test_sub = _build_test_subparser(subs, base_subparser)
|
||||
# --threads, --no-version-check
|
||||
_add_common_arguments(run_sub, compile_sub, generate_sub, test_sub,
|
||||
rpc_sub)
|
||||
# --models, --exclude
|
||||
_add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub,
|
||||
archive_sub)
|
||||
_add_selection_arguments(snapshot_sub, models_name='select')
|
||||
# --full-refresh
|
||||
_add_table_mutability_arguments(run_sub, compile_sub)
|
||||
|
||||
_build_seed_subparser(subs, base_subparser)
|
||||
_build_docs_serve_subparser(docs_subs, base_subparser)
|
||||
_build_source_snapshot_freshness_subparser(source_subs, base_subparser)
|
||||
_build_run_operation_subparser(subs, base_subparser)
|
||||
|
||||
if len(args) == 0:
|
||||
p.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
parsed = p.parse_args(args)
|
||||
parsed.profiles_dir = os.path.expanduser(parsed.profiles_dir)
|
||||
|
||||
if not hasattr(parsed, 'which'):
|
||||
# the user did not provide a valid subcommand. trigger the help message
|
||||
# and exit with a error
|
||||
p.print_help()
|
||||
p.exit(1)
|
||||
|
||||
return parsed
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user