Compare commits

...

387 Commits

Author SHA1 Message Date
Gerda Shank
2516e83028 fix test/unit/test_context.py to use postgres profile 2021-04-27 16:08:14 -04:00
Jeremy Cohen
081f30ee2d Include parent adapters in dispatch 2021-04-26 10:28:04 -04:00
Jeremy Cohen
89e4872c21 Add test, expect fail 2021-04-26 10:24:18 -04:00
Gerda Shank
33dc970859 Merge pull request #3272 from fishtown-analytics/test_context_regression
Add necessary macros to schema test context namespace
2021-04-20 12:36:40 -04:00
Kyle Wigley
f73202734c Merge pull request #3261 from fishtown-analytics/feature/test-jinja-block
Add `test` Jinja tag
2021-04-19 09:39:53 -04:00
Jeremy Cohen
32bacdab4b Merge pull request #3270 from dmateusp/dmateusp/3268/dbt_deps_support_commit_hashes
Issue 3268: Support commit hashes in dbt deps
2021-04-18 18:17:27 -04:00
Daniel Mateus Pires
6113c3b533 📖 Add myself to contribs 2021-04-18 21:33:01 +01:00
Gerda Shank
1c634af489 Add necessary macros to schema test context namespace [#3229] [#3240] 2021-04-16 13:21:18 -04:00
Daniel Mateus Pires
428cdea2dc ✏️ Update CHANGELOG 2021-04-16 10:58:04 +01:00
Daniel Mateus Pires
f14b55f839 Add test 2021-04-16 10:54:35 +01:00
Daniel Mateus Pires
5934d263b8 Support git commit as revision 2021-04-16 10:21:49 +01:00
Kyle Wigley
3860d919e6 use ternary and f-strings 2021-04-15 14:49:07 -04:00
Kyle Wigley
fd0b9434ae update changelog 2021-04-15 14:49:07 -04:00
Kyle Wigley
efb30d0262 first pass at adding test jinja block 2021-04-15 14:48:12 -04:00
Jeremy Cohen
cee0bfbfa2 Merge pull request #3257 from fishtown-analytics/feature/test-config-parity
Feature: test config parity
2021-04-15 14:15:19 -04:00
Jeremy Cohen
dc684d31d3 Add changelog entry 2021-04-15 14:04:34 -04:00
Gerda Shank
bfdf7f01b5 Merge pull request #3248 from fishtown-analytics/load_all_files
Preload all project files at start of parsing [#3244]
2021-04-14 13:33:32 -04:00
Gerda Shank
2cc0579b6e Preload all project files at start of parsing [#3244] 2021-04-14 09:57:26 -04:00
Jeremy Cohen
bfc472dc0f Cleanup + integration tests 2021-04-13 20:04:53 -04:00
Jeremy Cohen
ea4e3680ab Configure tests from dbt_project.yml 2021-04-13 20:04:42 -04:00
Jeremy Cohen
f02139956d Add support for disabling schema tests 2021-04-13 20:04:42 -04:00
Kyle Wigley
7ec5c122e1 Merge pull request #3228 from fishtown-analytics/cleanup/makefile
Better make targets
2021-04-12 10:49:59 -04:00
Jeremy Cohen
a10ab99efc Merge pull request #3243 from fuchsst/fix/3241-add-missing-exposures-property
Fix/3241 add missing exposures property
2021-04-12 09:27:40 -04:00
Fuchs, Stefan
9f4398c557 added contribution to changelog 2021-04-11 18:26:29 +02:00
Fuchs, Stefan
d60f6bc89b added exposures property to manifest 2021-04-11 18:26:05 +02:00
Kyle Wigley
617eeb4ff7 test code changes without reinstalling everything 2021-04-08 12:07:55 -04:00
Kyle Wigley
5b55825638 add flaky logic to bigquery 2021-04-07 09:02:44 -04:00
Kyle Wigley
103d524db5 update changelog 2021-04-06 14:33:05 -04:00
Kyle Wigley
babd084a9b better make targets, some descriptions 2021-04-06 14:33:05 -04:00
Gerda Shank
749f87397e Merge pull request #3219 from fishtown-analytics/partial_parsing
Use Manifest instead of ParseResult [#3163]
2021-04-06 14:05:17 -04:00
Gerda Shank
307d47ebaf Use Manifest instead of ParseResults [#3163] 2021-04-06 13:51:43 -04:00
Jeremy Cohen
6acd4b91c1 Merge pull request #3227 from fishtown-analytics/update/changelog-0191
Update changelog for 0.19.1, 0.18.2
2021-04-05 16:36:28 -04:00
Jeremy Cohen
f4a9530894 Update changelog per 0.18.2 2021-04-05 15:11:08 -04:00
Jeremy Cohen
ab65385a16 Update changelog per 0.19.1 2021-04-05 15:08:33 -04:00
Jeremy Cohen
ebd761e3dc Merge pull request #3156 from max-sixty/patch-1
Update google cloud dependencies
2021-04-02 15:13:24 -04:00
Maximilian Roos
3b942ec790 Merge branch 'develop' into patch-1 2021-04-02 10:09:26 -07:00
Maximilian Roos
b373486908 Update CHANGELOG.md 2021-04-02 10:08:51 -07:00
Maximilian Roos
c8cd5502f6 Update setup.py 2021-04-02 10:05:16 -07:00
Maximilian Roos
d6dd968c4f Pin to major versions 2021-03-31 18:51:53 -07:00
Jeremy Cohen
b8d73d2197 Merge pull request #3182 from cgopalan/app-name-for-postgres
Set application_name for Postgres connections
2021-03-31 15:56:59 -04:00
Kyle Wigley
17e57f1e0b Merge pull request #3181 from fishtown-analytics/feature/data-test-materialization
Adding test materialization, implement for data tests
2021-03-30 14:21:43 -04:00
Kyle Wigley
e21bf9fbc7 code comments and explicit function call 2021-03-30 12:47:40 -04:00
Kyle Wigley
12e281f076 update changelog 2021-03-29 09:50:37 -04:00
Kyle Wigley
a5ce658755 first pass using materialization for data tests 2021-03-29 09:49:02 -04:00
Kyle Wigley
ce30dfa82d Merge pull request #3204 from fishtown-analytics/updates/tox
dev env clean up and improvements
2021-03-29 09:47:49 -04:00
Kyle Wigley
c04d1e9d5c fix circleci tests (forcing azure pipeline tests) 2021-03-29 09:19:05 -04:00
Kyle Wigley
80031d122c fix windows rpc tests 2021-03-29 09:19:05 -04:00
Kyle Wigley
943b090c90 debug windows tests 2021-03-29 09:19:04 -04:00
Kyle Wigley
39fd53d1f9 fix typos, set max-line-length (force azure pipeline tests) 2021-03-29 09:19:04 -04:00
Kyle Wigley
777e7b3b6d update changelog 2021-03-29 09:19:04 -04:00
Kyle Wigley
2783fe2a9f fix last CI step 2021-03-29 09:11:06 -04:00
Kyle Wigley
f5880cb001 CI tweaks 2021-03-29 09:11:06 -04:00
Kyle Wigley
26e501008a use new docker image for tests 2021-03-29 09:11:06 -04:00
Kyle Wigley
2c67e3f5c7 update tox, update makefile, run tests natively by default, general dev workflow cleanup 2021-03-29 09:11:06 -04:00
Kyle Wigley
033596021d Merge pull request #3148 from fishtown-analytics/dependabot/pip/plugins/snowflake/snowflake-connector-python-secure-local-storage--2.4.1
Bump snowflake-connector-python[secure-local-storage] from 2.3.6 to 2.4.1 in /plugins/snowflake
2021-03-29 09:09:49 -04:00
Kyle Wigley
f36c72e085 update changelog 2021-03-29 08:36:33 -04:00
Kyle Wigley
fefaf7b4be update snowflake deps 2021-03-26 16:04:48 -04:00
Chandrakant Gopalan
91431401ad Updated changelog 2021-03-26 16:01:47 -04:00
Chandrakant Gopalan
59d96c08a1 Add tests for application_name 2021-03-26 15:25:38 -04:00
Chandrakant Gopalan
f10447395b Fix tests 2021-03-25 21:18:42 -04:00
Chandrakant Gopalan
c2b6222798 Merge branch 'develop' of https://github.com/fishtown-analytics/dbt into app-name-for-postgres 2021-03-25 21:03:40 -04:00
Chandrakant Gopalan
3a58c49184 Default application_name to dbt 2021-03-25 21:03:08 -04:00
Jeremy Cohen
440a5e49e2 Merge pull request #3041 from yu-iskw/issue-3040
Pass the default scopes to the default BigQuery credentials
2021-03-24 17:59:32 -04:00
Jeremy Cohen
77c10713a3 Merge pull request #3100 from prratek/specify-cols-to-update
Gets columns to update from config for BQ and Snowflake
2021-03-22 17:55:49 +01:00
Jeremy Cohen
48e367ce2f Merge branch 'develop' into specify-cols-to-update 2021-03-22 13:56:11 +01:00
Jeremy Cohen
934c23bf39 Merge pull request #3145 from jmcarp/jmcarp/bigquery-job-labels
Parse query comment and use as bigquery job labels.
2021-03-22 13:30:08 +01:00
Chandrakant Gopalan
e0febcb6c3 Set application_name for Postgres connections 2021-03-20 13:24:44 -04:00
Joshua Carp
044a6c6ea4 Cleanups from code review. 2021-03-20 00:59:55 -04:00
Prratek Ramchandani
8ebbc10572 Merge branch 'develop' into specify-cols-to-update 2021-03-18 09:55:12 -04:00
Jeremy Cohen
7435828082 Merge pull request #3165 from cgopalan/dup-macro-message
Raise proper error message if duplicate macros found
2021-03-17 14:53:50 +01:00
Jeremy Cohen
369b595e8a Merge branch 'develop' into dup-macro-message 2021-03-17 12:46:38 +01:00
Jeremy Cohen
9a6d30f03d Merge pull request #3158 from techytushar/fix#3147
Feature to add _n alias to same column names #3147
2021-03-17 12:15:35 +01:00
Prratek Ramchandani
6bdd01d52b Merge branch 'develop' into specify-cols-to-update 2021-03-16 16:13:30 -04:00
prratek
bae9767498 add my name to contributors 2021-03-16 16:07:50 -04:00
prratek
b0e50dedb8 update changelog 2021-03-16 15:59:34 -04:00
Prratek Ramchandani
96bfb3b259 Update core/dbt/include/global_project/macros/materializations/common/merge.sql
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-03-16 15:54:04 -04:00
Prratek Ramchandani
909068dfa8 leave quoting for merge_update_columns to the user
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-03-16 15:53:25 -04:00
Chandrakant Gopalan
f4c74968be Add to contributor list 2021-03-16 13:53:51 -04:00
Chandrakant Gopalan
0e958f3704 Add to changelog 2021-03-16 13:51:42 -04:00
Chandrakant Gopalan
a8b2942f93 Fix duplicate macro path message 2021-03-16 13:40:18 -04:00
Tushar Mittal
564fe62400 Add unit tests for SQL process_results 2021-03-16 21:42:36 +05:30
Chandrakant Gopalan
5c5013191b Fix failing test 2021-03-14 17:46:53 -04:00
Chandrakant Gopalan
31989b85d1 Fix flake8 errors 2021-03-14 15:46:47 -04:00
Chandrakant Gopalan
5ed4af2372 Raise proper error message if duplicate macros found 2021-03-14 15:33:15 -04:00
prratek
4d18e391aa correct the load date for updated entries in "update seed" 2021-03-13 12:59:41 -05:00
prratek
2feeb5b927 Merge remote-tracking branch 'origin/specify-cols-to-update' into specify-cols-to-update 2021-03-13 12:26:22 -05:00
Prratek Ramchandani
2853f07875 use correct config var name
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-03-13 12:26:12 -05:00
prratek
4e6adc07a1 switch to correct data dir for second run 2021-03-13 12:25:02 -05:00
Jeremy Cohen
6a5ed4f418 Merge pull request #3161 from fishtown-analytics/pin/agate-1.6.2
Pin agate<1.6.2 to fix installation
2021-03-12 17:11:40 +01:00
Jeremy Cohen
ef25698d3d Pin agate>=1.6,<1.6.2 to fix installation 2021-03-12 12:17:53 +01:00
Tushar Mittal
429dcc7000 Update changelog 2021-03-11 22:10:57 +05:30
Gerda Shank
ab3f994626 Merge pull request #3157 from fishtown-analytics/track_resource_counts
Track resource counts
2021-03-11 09:42:09 -05:00
Tushar Mittal
5f8235fcfc Feature to add _n alias to same column names #3147
Signed-off-by: Tushar Mittal <chiragmittal.mittal@gmail.com>
2021-03-11 19:44:58 +05:30
Gerda Shank
db325d0fde Track resource counts 2021-03-10 23:07:02 -05:00
Maximilian Roos
8dc1f49ac7 Update google cloud dependencies 2021-03-10 19:57:57 -08:00
Joshua Carp
9fe2b651ed Merge branch 'develop' of github.com:fishtown-analytics/dbt into jmcarp/bigquery-job-labels 2021-03-09 23:32:11 -05:00
Jeremy Cohen
24e4b75c35 Merge pull request #3151 from bastienboutonnet/chore/bump_hologram_and_dataclasses
chore: allow dataclasses >= 0.6, < 0.9
2021-03-09 17:59:29 +01:00
Bastien Boutonnet
34174abf26 add changelog entry 2021-03-09 15:56:39 +01:00
Bastien Boutonnet
af778312cb relax dependency on dataclasses requirements as in hologram 2021-03-09 15:55:22 +01:00
Bastien Boutonnet
280f5614ef bump hologram to 0.0.14 2021-03-08 22:24:46 +01:00
Joshua Carp
8566a46793 Add BigQuery job labels to changelog. 2021-03-08 15:50:00 -05:00
Joshua Carp
af3c3f4cbe Add tests for bigquery label sanitize helper. 2021-03-08 15:43:53 -05:00
Bastien Boutonnet
034a44e625 fix git install link 2021-03-08 21:02:00 +01:00
Bastien Boutonnet
84155fdff7 point hologram install to my fork and up dataclasses 2021-03-08 20:42:16 +01:00
prratek
8255c913a3 change test logic for new seed directories 2021-03-06 19:12:33 -05:00
prratek
4d4d17669b refactor seeds directory structure and names 2021-03-06 19:08:23 -05:00
prratek
540a0422f5 modify seeds to contain load date and some modified records 2021-03-06 19:06:35 -05:00
prratek
de4d7d6273 Revert "modify some records and the expected result"
This reverts commit 1345d955
2021-03-06 18:39:35 -05:00
prratek
1345d95589 modify some records and the expected result 2021-03-06 18:30:50 -05:00
prratek
a5bc19dd69 paste in some data for seeds 2021-03-06 18:27:02 -05:00
prratek
25b143c8cc WIP test case and empty seeds 2021-03-06 18:17:07 -05:00
Joshua Carp
82cca959e4 Merge branch 'develop' of github.com:fishtown-analytics/dbt into jmcarp/bigquery-job-labels 2021-03-05 09:39:48 -05:00
dependabot[bot]
d52374a0b6 Bump snowflake-connector-python[secure-local-storage]
Bumps [snowflake-connector-python[secure-local-storage]](https://github.com/snowflakedb/snowflake-connector-python) from 2.3.6 to 2.4.1.
- [Release notes](https://github.com/snowflakedb/snowflake-connector-python/releases)
- [Commits](https://github.com/snowflakedb/snowflake-connector-python/compare/v2.3.6...v2.4.1)

Signed-off-by: dependabot[bot] <support@github.com>
2021-03-05 06:15:17 +00:00
Joshua Carp
c71a18ca07 Hyphenate query comment fields and fix deserialization bug. 2021-03-05 00:09:17 -05:00
Joshua Carp
8d73ae2cc0 Address comments from code review. 2021-03-04 10:20:15 -05:00
Joshua Carp
7b0c74ca3e Fix lint. 2021-03-04 00:34:46 -05:00
Joshua Carp
62be9f9064 Sanitize bigquery labels. 2021-03-04 00:14:50 -05:00
Joshua Carp
2fdc113d93 Parse query comment and use as bigquery job labels. 2021-03-04 00:06:59 -05:00
Gerda Shank
b70fb543f5 Merge pull request #3138 from fishtown-analytics/mashumaro-cleanup
Use updated Mashumaro code
2021-03-03 15:46:56 -05:00
Gerda Shank
31c88f9f5a Use updated Mashumaro code 2021-03-03 13:39:51 -05:00
Prratek Ramchandani
af3a818f12 loop over column_name instead of column.name
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-03-03 09:27:05 -05:00
prratek
a07532d4c7 revert changes to incremental materializations 2021-03-02 22:14:58 -05:00
prratek
fb449ca4bc rename new config var to merge_update_columns 2021-03-02 22:11:12 -05:00
prratek
4da65643c0 use merge_update_columns when getting merge sql 2021-03-02 22:10:09 -05:00
Prratek Ramchandani
bf64db474c fix typo
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-03-02 21:59:21 -05:00
Kyle Wigley
344a14416d Merge pull request #3065 from fishtown-analytics/feature/write-artifact-schema
Collect and write json schema for dbt artifacts
2021-02-22 12:47:35 -05:00
Jeremy Cohen
be47a0c5db Merge pull request #3117 from fishtown-analytics/fix/deps-git-warn-if-main-master
git package: warn if specified revision master, main
2021-02-22 10:49:31 +01:00
prratek
808b980301 move "update cols" incremental test to snowflake models 2021-02-20 14:40:55 -05:00
prratek
3528480562 test incremental model w/ subset of cols to update 2021-02-19 23:17:00 -05:00
prratek
6bd263d23f add incremental_update_columns to Snowflake & BQ config schemas 2021-02-19 21:17:39 -05:00
prratek
2b9aa3864b rename config field to incremental_update_columns 2021-02-19 21:13:05 -05:00
Prratek Ramchandani
81155caf88 use get_columns_in_relation as default for snowflake
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-02-19 21:10:34 -05:00
Jeremy Cohen
c7c057483d sry flake8 2021-02-19 15:19:03 +01:00
Jeremy Cohen
7f5170ae4d Warn if main or master is specified, too 2021-02-19 11:19:36 +01:00
Jeremy Cohen
49b8693b11 Merge pull request #3104 from VasiliiSurov/develop
3057 Moving from 'master' to 'HEAD' default branch in git
2021-02-18 20:29:34 +01:00
Jeremy Cohen
d7b0a14eb5 Merge branch 'develop' into develop 2021-02-18 19:55:54 +01:00
Jeremy Cohen
8996cb1e18 Merge pull request #2976 from pcasteran/fix/2940-bq-incremental-var-declaration
Fix `_dbt_max_partition` declaration and initialization for BigQuery incremental models
2021-02-18 18:54:16 +01:00
Jeremy Cohen
38f278cce0 Merge pull request #3111 from fishtown-analytics/feature/architecture-md
Add ARCHITECTURE.md
2021-02-18 14:55:05 +01:00
Jeremy Cohen
bb4e475044 Add aARCHITECTURE.md [skip ci] 2021-02-17 14:58:43 +01:00
Pascal Casteran
4fbe36a8e9 Moved changelog entry up to v0.20.0 2021-02-16 19:12:22 +01:00
Pascal Casteran
a1a40b562a Updated CHANGELOG.md 2021-02-16 19:09:22 +01:00
Pascal Casteran
3a4a1bb005 Fix _dbt_max_partition declaration and initialization for BigQuery incremental models 2021-02-16 19:09:22 +01:00
Prratek Ramchandani
4f8c10c1aa default to get_columns_in_relation if not specified in config
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-02-15 20:58:39 -05:00
VS
4833348769 Merge remote-tracking branch 'origin/develop' into develop 2021-02-15 17:06:43 -05:00
VS
ad07d59a78 3057 Changelog.md 2021-02-15 17:06:36 -05:00
VasiliiSurov
e8aaabd1d3 Merge pull request #1 from VasiliiSurov/3057_dbt_deps
3057 Replacing 'master' to 'HEAD' for default git commit
2021-02-15 17:02:13 -05:00
Kyle Wigley
d7d7396eeb update dependabot config, turn off rebasing 2021-02-15 14:42:44 -05:00
Kyle Wigley
41538860cd Revert "Merge branch '0.19.latest' into develop"
This reverts commit 5c9f8a0cf0, reversing
changes made to 36d1bddc5b.
2021-02-15 13:26:18 -05:00
Kyle Wigley
5c9f8a0cf0 Merge branch '0.19.latest' into develop 2021-02-15 12:11:52 -05:00
Github Build Bot
11c997c3e9 Merge remote-tracking branch 'origin/releases/0.19.1b2' into 0.19.latest 2021-02-15 17:04:12 +00:00
Github Build Bot
1b1184a5e1 Release dbt v0.19.1b2 2021-02-15 17:00:50 +00:00
Kyle Wigley
4ffcc43ed9 Merge pull request #3101 from fishtown-analytics/vendor-mashumaro
Vendor mashumaro code with dbt specific modifications
2021-02-15 10:36:10 -05:00
Kyle Wigley
4ccaac46a6 Update core/mashumaro/types.py
Co-authored-by: Jeremy Cohen <jeremy@fishtownanalytics.com>
2021-02-15 10:02:50 -05:00
Kyle Wigley
ba88b84055 Update core/mashumaro/serializer/base/dict.py
Co-authored-by: Jeremy Cohen <jeremy@fishtownanalytics.com>
2021-02-15 10:02:45 -05:00
prratek
9086634c8f get columns to update from config for BQ and Snowflake 2021-02-13 11:09:25 -05:00
VS
e88f1f1edb 3057 Replacing 'master' to 'HEAD' for default git commit 2021-02-12 22:34:49 -05:00
Kyle Wigley
13c7486f0e add license info 2021-02-12 17:32:15 -05:00
Kyle Wigley
8e811ba141 vendor mashumaro code in dbt, include as another module in dbt-core 2021-02-12 17:19:28 -05:00
Github Build Bot
c5d86afed6 Merge remote-tracking branch 'origin/releases/0.19.1b1' into 0.19.latest 2021-02-12 16:10:31 +00:00
Github Build Bot
43a0cfbee1 Release dbt v0.19.1b1 2021-02-12 16:06:52 +00:00
Jeremy Cohen
8567d5f302 Fix int64, ts partitions. Rework tests 2021-02-12 15:58:47 +01:00
Jeremy Cohen
36d1bddc5b Merge pull request #3098 from fishtown-analytics/fix/bq-insert-overwrite-int-ts-partitions
[BQ] Fix insert_overwrite with int + ts partitions
2021-02-12 15:56:59 +01:00
Kyle Wigley
bf992680af Merge pull request #3090 from fishtown-analytics/fix/dbt-mashumaro-dep
Move dbt-mashumaro dep to setup.py to prep for beta release
2021-02-12 09:32:31 -05:00
Kyle Wigley
e064298dfc move dbt-mashumaro dep to setup.py to prep for beta release 2021-02-12 08:51:14 -05:00
Jeremy Cohen
e01a10ced5 Fix int64, ts partitions. Rework tests 2021-02-12 14:03:03 +01:00
Gerda Shank
2aa10fb1ed Use version 0.0.13 of Hologram 2021-02-11 11:48:58 -05:00
Gerda Shank
66f442ad76 Merge pull request #3071 from fishtown-analytics/hologram-version-0.0.13
Use version 0.0.13 of Hologram
2021-02-11 11:46:57 -05:00
Kyle Wigley
11f1ecebcf update changelog 2021-02-11 11:21:40 -05:00
Kyle Wigley
e339cb27f6 first pass at writing out artifact schema to destination 2021-02-11 11:20:38 -05:00
Kyle Wigley
bce3232b39 Merge pull request #3062 from fishtown-analytics/feature/dependabot-config
Add dependabot config
2021-02-11 10:22:13 -05:00
Kyle Wigley
b08970ce39 Merge pull request #3069 from fishtown-analytics/fix/update-docs
Update docs with permalinks and new git branching strategy
2021-02-11 10:17:57 -05:00
Gerda Shank
533f88ceaf Use version 0.0.13 of Hologram 2021-02-10 14:08:27 -05:00
Jeremy Cohen
c8f0469a44 Merge pull request #3036 from ran-eh/patch-2
Fix incorrect error message (trivial)
2021-02-10 12:00:43 +01:00
Kyle Wigley
a1fc24e532 update docs with permalinks and new git branching 2021-02-10 00:31:40 -05:00
Ran Ever-Hadani
d80daa48df Fix flake8 error
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-02-09 14:07:50 -08:00
Kyle Wigley
92aae2803f update changelog 2021-02-09 09:53:32 -05:00
Gerda Shank
77cbbbfaf2 Performance fixes, including supporting libyaml, caching
mapped_fields in the classes for 'from_dict', removing deepcopy
on fqn_search, separating validation from 'from_dict',
and special handling for dbt internal not_null and unique tests.
Use TestMacroNamespace instead of original in order to limit
the number of macros in the context.  Integrate mashumaro into
dbt to improve performance of 'from_dict' and 'to_dict'
2021-02-08 14:06:14 -05:00
Gerda Shank
6c6649f912 Performance fixes, including supporting libyaml, caching
mapped_fields in the classes for 'from_dict', removing deepcopy
on fqn_search, separating validation from 'from_dict',
and special handling for dbt internal not_null and unique tests.
Use TestMacroNamespace instead of original in order to limit
the number of macros in the context.  Integrate mashumaro into
dbt to improve performance of 'from_dict' and 'to_dict'
2021-02-05 15:23:55 -05:00
Yu ISHIKAWA
55fbaabfda Pass the default scopes to the default BigQuery credentials 2021-01-29 18:09:05 +09:00
Ran Ever-Hadani
56c2518936 Correct message to be more general
as per https://github.com/fishtown-analytics/dbt/pull/3036#pullrequestreview-578328868
2021-01-28 11:22:14 -08:00
Kyle Wigley
2b48152da6 Merge branch 'dev/0.19.1' into dev/margaret-mead 2021-01-27 17:16:13 -05:00
Christophe Blefari
e743e23d6b Update CHANGELOG to release fix in dbt 0.19.1 version 2021-01-27 16:57:29 -05:00
Christophe Blefari
f846f921f2 Bump werkzeug upper bound dependency constraint to include version 1.0 2021-01-27 16:55:56 -05:00
Ran Ever-Hadani
e52a599be6 Add fix 2021-01-27 13:52:01 -08:00
Ran Ever-Hadani
99744bd318 Fix incorrect error message (trivial) 2021-01-27 13:09:32 -08:00
Github Build Bot
1060035838 Merge remote-tracking branch 'origin/releases/0.19.0' into dev/kiyoshi-kuromiya 2021-01-27 18:02:37 +00:00
Github Build Bot
69cc20013e Release dbt v0.19.0 2021-01-27 17:39:48 +00:00
Github Build Bot
3572bfd37d Merge remote-tracking branch 'origin/releases/0.19.0rc3' into dev/kiyoshi-kuromiya 2021-01-27 16:42:46 +00:00
Github Build Bot
a6b82990f5 Release dbt v0.19.0rc3 2021-01-27 16:07:41 +00:00
Kyle Wigley
540c1fd9c6 Merge pull request #3019 from fishtown-analytics/fix/cleanup-dockerfile
Clean up docker resources
2021-01-25 10:19:45 -05:00
Jeremy Cohen
46d36cd412 Merge pull request #3028 from NiallRees/lowercase_cte_names
Make generated CTE test names lowercase to match style guide
2021-01-25 14:39:26 +01:00
NiallRees
a170764fc5 Add to contributors 2021-01-25 11:16:00 +00:00
NiallRees
f72873a1ce Update CHANGELOG.md
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-01-25 11:13:32 +00:00
NiallRees
82496c30b1 Changelog 2021-01-24 16:35:40 +00:00
NiallRees
cb3c007acd Make generated CTE test names lowercase to match style guide 2021-01-24 16:19:20 +00:00
Jeremy Cohen
cb460a797c Merge pull request #3018 from lynxcare/fix-issue-debug-exit-code
dbt debug should return 1 when one of the tests fail
2021-01-21 16:36:03 +01:00
Kyle Wigley
1b666d01cf add a dependabot config, hopefully this ignores docker/requirements/*.txt files 2021-01-21 09:51:35 -05:00
Sam Debruyn
df24c7d2f8 Merge branch 'dev/margaret-mead' into fix-issue-debug-exit-code 2021-01-21 15:39:18 +01:00
Sam Debruyn
133c15c0e2 move in changelog to v0.20 2021-01-21 15:38:31 +01:00
Kyle Wigley
116e18a19e rename testing dockerfile 2021-01-21 09:28:17 -05:00
Sam Debruyn
ec0af7c97b remove exitcodes and sys.exit 2021-01-21 10:36:05 +01:00
Jeremy Cohen
a34a877737 Merge pull request #2974 from rvacaru/fix-bug-2731
Fix bug #2731 on stripping query comments for snowflake
2021-01-21 09:54:22 +01:00
Sam Debruyn
f018794465 fix flake test - formatting 2021-01-20 21:09:58 +01:00
Sam Debruyn
d45f5e9791 add missing conditions 2021-01-20 18:15:32 +01:00
Razvan Vacaru
04bd0d834c added extra unit test 2021-01-20 18:06:17 +01:00
Sam Debruyn
ed4f0c4713 formatting 2021-01-20 18:04:21 +01:00
Sam Debruyn
c747068d4a use sys.exit 2021-01-20 16:51:06 +01:00
Kyle Wigley
aa0fbdc993 update changelog 2021-01-20 10:33:18 -05:00
Kyle Wigley
b50bfa7277 - rm older dockerfiles
- add dockerfile from dbt-releases
- rename the development dockerfile to Dockerfile.dev to avoid confusion
2021-01-20 10:23:03 -05:00
Sam Debruyn
e91988f679 use ExitCodes enum for exit code 2021-01-20 16:09:41 +01:00
Sam Debruyn
3ed1fce3fb update changelog 2021-01-20 16:06:24 +01:00
Sam Debruyn
e3ea0b511a dbt debug should return 1 when one of the tests fail 2021-01-20 16:00:58 +01:00
Razvan Vacaru
c411c663de moved unit tests and updated changelog.md 2021-01-19 19:04:58 +01:00
Razvan Vacaru
1c6f66fc14 Merge branch 'dev/margaret-mead' of https://github.com/fishtown-analytics/dbt into fix-bug-2731 2021-01-19 19:01:01 +01:00
Jeremy Cohen
1f927a374c Merge pull request #2928 from yu-iskw/issue-1843
Support require_partition_filter and partition_expiration_days in BQ
2021-01-19 12:11:39 +01:00
Jeremy Cohen
07c4225aa8 Merge branch 'dev/margaret-mead' into issue-1843 2021-01-19 11:24:59 +01:00
Github Build Bot
42a85ac39f Merge remote-tracking branch 'origin/releases/0.19.0rc2' into dev/kiyoshi-kuromiya 2021-01-14 17:41:49 +00:00
Github Build Bot
16e6d31ee3 Release dbt v0.19.0rc2 2021-01-14 17:21:25 +00:00
Kyle Wigley
a6db5b436d Merge pull request #2996 from fishtown-analytics/fix/rm-ellipses
Remove ellipses printed while parsing
2021-01-14 10:39:16 -05:00
Kyle Wigley
47675f2e28 update changelog 2021-01-14 09:28:28 -05:00
Kyle Wigley
0642bbefa7 remove ellipses printed while parsing 2021-01-14 09:28:05 -05:00
Kyle Wigley
43da603d52 Merge pull request #3009 from fishtown-analytics/fix/exposure-parsing
Fix exposure parsing to allow other resources with the same name
2021-01-14 09:26:02 -05:00
Kyle Wigley
f9e1f4d111 update changelog 2021-01-13 11:54:20 -05:00
Jeremy Cohen
1508564e10 Merge pull request #3008 from fishtown-analytics/feat/print-exposure-stats-too
Add exposures to print_compile_stats
2021-01-13 15:58:13 +01:00
Kyle Wigley
c14e6f4dcc add test for dupe exposures and dupe model/exposure name 2021-01-13 08:55:22 -05:00
Jeremy Cohen
75b6a20134 Add exposures to Found list 2021-01-12 19:07:52 +01:00
Kyle Wigley
d82a07c221 tweak exposure parsing logic 2021-01-12 12:41:51 -05:00
Jeremy Cohen
c6f7dbcaa5 Merge pull request #3006 from stpierre/postgres-unpin-botocore
postgres: Don't pin botocore version
2021-01-12 13:59:55 +01:00
Chris St. Pierre
82cd099e48 Update CHANGELOG 2021-01-12 06:20:09 -06:00
Chris St. Pierre
546c011dd8 postgres: Don't pin botocore version
`snowflake-connector-python` doesn't pin it, and it restricts us to a
much older version of boto3 than the boto3 pin would otherwise allow
(specifically, botocore<1.15 requires boto3<1.12).
2021-01-11 17:25:03 -06:00
Jeremy Cohen
10b33ccaf6 Merge pull request #3004 from mikaelene/Snapshot_merge_WHEN_MATCHED
This changes makes the macro easier to read and workable on SQL Server
2021-01-11 16:42:09 +01:00
mikaelene
bc01572176 Sane as #3003. But for postgres 2021-01-11 16:04:38 +01:00
mikaelene
ccd2064722 This changes makes the macro easier to read and makes the code work for SQL Server without a custom adapter macro. Solved #3003 2021-01-11 15:04:23 +01:00
mikaelene
0fb42901dd This changes makes the macro easier to read and makes the code work for SQL Server without a custom adapter macro. Solved #3003 2021-01-11 14:58:07 +01:00
Jeremy Cohen
a4280d7457 Merge pull request #3000 from swanderz/tsql_not_equal_workaround
Tsql not equal workaround
2021-01-11 09:40:33 +01:00
Anders
6966ede68b Update CHANGELOG.md
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-01-10 20:54:37 -08:00
Anders
27dd14a5a2 Update core/dbt/include/global_project/macros/materializations/snapshot/strategies.sql
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-01-10 20:54:10 -08:00
Anders
2494301f1e Update CHANGELOG.md
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2021-01-10 20:53:52 -08:00
Anders Swanson
f13143accb for posterity 2021-01-08 13:23:13 -08:00
Anders Swanson
26d340a917 temp hack 2021-01-08 12:14:08 -08:00
Anders Swanson
cc75cd4102 no tsql support for condA != condB 2021-01-08 12:10:15 -08:00
Anders Swanson
cf8615b231 Merge branch 'dev/kiyoshi-kuromiya' of https://github.com/fishtown-analytics/dbt into dev/kiyoshi-kuromiya 2021-01-08 12:03:15 -08:00
Jeremy Cohen
30f473a2b1 Merge pull request #2994 from fishtown-analytics/copyedit-changelog
Light cleanup of v0.19.0 changelogs
2021-01-07 16:00:02 +01:00
Jeremy Cohen
4618709baa Lightly edit v0.19 changelogs 2021-01-07 10:13:43 +01:00
Razvan Vacaru
16b098ea42 updated CHANGELOG.md 2021-01-04 17:43:03 +01:00
Razvan Vacaru
b31c4d407a Fix #2731 stripping snowflake comments in multiline queries 2021-01-04 17:41:00 +01:00
Kyle Wigley
28c36cc5e2 Merge pull request #2988 from fishtown-analytics/fix/dockerfile
Manually fix requirements for dockerfile using new pip version
2021-01-04 09:10:05 -05:00
Kyle Wigley
6bfbcb842e manually fix dockerfile using new pip version 2020-12-31 13:53:50 -05:00
Github Build Bot
a0eade4fdd Merge remote-tracking branch 'origin/releases/0.19.0rc1' into dev/kiyoshi-kuromiya 2020-12-29 23:07:35 +00:00
Github Build Bot
ee24b7e88a Release dbt v0.19.0rc1 2020-12-29 22:52:34 +00:00
Anders Swanson
c9baddf9a4 Merge branch 'master' of https://github.com/fishtown-analytics/dbt into dev/kiyoshi-kuromiya 2020-12-22 23:11:09 -08:00
Kyle Wigley
c5c780a685 Merge pull request #2972 from fishtown-analytics/feature/update-dbt-docs
dbt-docs changes for v0.19.0-rc1
2020-12-22 14:07:20 -05:00
Kyle Wigley
421aaabf62 Merge pull request #2961 from fishtown-analytics/feature/add-adapter-query-stats
Include adapter response info in execution results
2020-12-22 13:57:07 -05:00
Kyle Wigley
86788f034f update changelog 2020-12-22 13:30:50 -05:00
Kyle Wigley
232d3758cf update dbt docs 2020-12-22 13:17:51 -05:00
Kyle Wigley
71bcf9b31d update changelog 2020-12-22 13:08:12 -05:00
Kyle Wigley
bf4ee4f064 update api, fix tests, add placeholder for test/source results 2020-12-22 12:13:37 -05:00
Kyle Wigley
aa3bdfeb17 update naming 2020-12-21 13:35:15 -05:00
Jeremy Cohen
ce6967d396 Merge pull request #2966 from fishtown-analytics/fix/add-ctes-comment
Update comments for _add_ctes()
2020-12-18 10:54:37 -05:00
Yu ISHIKAWA
330065f5e0 Add a condition for require_partition_filter 2020-12-18 11:14:03 +09:00
Yu ISHIKAWA
944db82553 Remove unnecessary code for print debug 2020-12-18 11:14:03 +09:00
Yu ISHIKAWA
c257361f05 Fix syntax 2020-12-18 11:14:03 +09:00
Yu ISHIKAWA
ffdbfb018a Implement tests in test_bigquery_changing_partitions.py 2020-12-18 11:14:01 +09:00
Yu ISHIKAWA
cfa2bd6b08 Remove tests fromm test_bigquery_adapter_specific.py 2020-12-18 11:13:16 +09:00
Yu ISHIKAWA
51e90c3ce0 Format 2020-12-18 11:13:16 +09:00
Yu ISHIKAWA
d69149f43e Update 2020-12-18 11:13:15 +09:00
Yu ISHIKAWA
f261663f3d Add debug code 2020-12-18 11:13:15 +09:00
Yu ISHIKAWA
e5948dd1d3 Update 2020-12-18 11:13:15 +09:00
Yu ISHIKAWA
5f13aab7d8 Print debug 2020-12-18 11:13:15 +09:00
Yu ISHIKAWA
292d489592 Format code 2020-12-18 11:13:15 +09:00
Yu ISHIKAWA
0a01f20e35 Update CHANGELOG.md 2020-12-18 11:13:11 +09:00
Yu ISHIKAWA
2bd08d5c4c Support require_partition_filter and partition_expiration_days in BQ 2020-12-18 11:12:47 +09:00
Jeremy Cohen
adae5126db Merge pull request #2954 from fishtown-analytics/feature/defer-tests
Feature: defer tests
2020-12-17 18:01:14 -05:00
Kyle Wigley
dddf1bcb76 first pass at adding query stats, naming tbd 2020-12-17 16:39:02 -05:00
Jeremy Cohen
d23d4b0fd4 Merge pull request #2963 from tyang209/issue-2931
Bumped boto3 version uppper range for dbt-redshift
2020-12-17 14:30:47 -05:00
Tao Yang
658f7550b3 Merge branch 'dev/kiyoshi-kuromiya' into issue-2931 2020-12-17 08:58:49 -08:00
Kyle Wigley
cfb50ae21e Merge pull request #2960 from fishtown-analytics/feature/python-39
Test python3.9
2020-12-17 11:11:56 -05:00
Jeremy Cohen
9b0a365822 Update comments for _add_ctes() 2020-12-17 10:35:04 -05:00
Jeremy Cohen
97ab130619 Merge pull request #2958 from fishtown-analytics/fix/keyerror-defer-missing-parent
Fix KeyError from defer + deletion
2020-12-17 10:29:51 -05:00
Tao Yang
3578fde290 Bumped boto3 version uppper range for dbt-redshift 2020-12-16 20:03:53 -08:00
Jeremy Cohen
f382da69b8 Changelog 2020-12-16 17:46:00 -05:00
Jeremy Cohen
2da3d215c6 Add test case to repro bug 2020-12-16 17:38:27 -05:00
Kyle Wigley
43ed29c14c update changelog 2020-12-16 16:29:48 -05:00
Jeremy Cohen
9df0283689 Truthier? 2020-12-16 14:55:27 -05:00
Jeremy Cohen
04b82cf4a5 What is backward may not be forward 2020-12-16 14:55:27 -05:00
Jeremy Cohen
274c3012b0 Add defer to rpc test method 2020-12-16 14:53:25 -05:00
Jeremy Cohen
2b24a4934f defer tests, too 2020-12-16 14:42:00 -05:00
Kyle Wigley
692a423072 comment out snowflake py39 tests 2020-12-16 11:27:00 -05:00
Kyle Wigley
148f55335f address issue with py39 2020-12-16 11:25:31 -05:00
Kyle Wigley
2f752842a1 update hologram and add new envs to tox 2020-12-16 11:25:31 -05:00
Jeremy Cohen
aff72996a1 Merge pull request #2946 from fishtown-analytics/fix/defer-if-not-exist
Defer iff unselected reference does not exist in current env
2020-12-16 11:22:31 -05:00
Jeremy Cohen
08e425bcf6 Handle keyerror if old node missing 2020-12-16 00:24:00 -05:00
Kyle Wigley
454ddc601a Merge pull request #2943 from fishtown-analytics/feature/refactor-run-results
Clean up run results
2020-12-15 12:42:22 -05:00
Jeremy Cohen
b025f208a8 Check if relation exists before deferring 2020-12-14 22:21:43 -05:00
Kyle Wigley
b60e533b9d fix printer output 2020-12-14 19:50:17 -05:00
Kyle Wigley
37af0e0d59 update changelog 2020-12-14 16:28:23 -05:00
Kyle Wigley
ac1de5bce9 more updates 2020-12-14 16:28:23 -05:00
Kyle Wigley
ef7ff55e07 flake8 2020-12-14 16:28:23 -05:00
Kyle Wigley
608db5b982 code cleanup + swap node with unique_id 2020-12-14 16:28:23 -05:00
Kyle Wigley
8dd69efd48 address test failures 2020-12-14 16:28:23 -05:00
Kyle Wigley
73f7fba793 fix printing test status 2020-12-14 16:28:23 -05:00
Kyle Wigley
867e2402d2 chugging along 2020-12-14 16:28:23 -05:00
Kyle Wigley
a3b9e61967 first pass, lots of TODO's [skip ci] 2020-12-14 16:28:22 -05:00
Jeremy Cohen
cd149b68e8 Merge pull request #2920 from joellabes/2913-docs-block-exposures
Render docs blocks in exposures
2020-12-13 18:38:23 -05:00
Joel Labes
cd3583c736 Merge branch 'dev/kiyoshi-kuromiya' into 2913-docs-block-exposures 2020-12-13 14:27:37 +13:00
Joel Labes
441f86f3ed Add test.notebook_info to expected manifest 2020-12-13 14:25:37 +13:00
Joel Labes
f62bea65a1 Move model.test.view_summary to parent map instead of child map 2020-12-13 14:11:04 +13:00
Jeremy Cohen
886b574987 Merge pull request #2939 from fishtown-analytics/fix/big-seed-smaller-path
Use diff file path for big seed checksum
2020-12-07 11:18:15 -05:00
Joel Labes
2888bac275 Merge branch 'dev/kiyoshi-kuromiya' into 2913-docs-block-exposures 2020-12-07 21:17:21 +13:00
Joel Labes
35c9206916 Fix test failure (?) 2020-12-07 21:15:44 +13:00
Joel Labes
c4c5b59312 Stab at updating parent and child maps 2020-12-07 17:45:12 +13:00
Jeremy Cohen
f25fb4e5ac Use diff file path for big seed checksum 2020-12-04 17:04:27 -05:00
Jeremy Cohen
868bfec5e6 Merge pull request #2907 from max-sixty/raise
Remove duplicate raise
2020-12-03 14:17:58 -05:00
Jeremy Cohen
e7c242213a Merge pull request #2908 from max-sixty/bq-default-project
Allow BigQuery to default on project name
2020-12-03 14:17:02 -05:00
Jeremy Cohen
862552ead4 Merge pull request #2930 from fishtown-analytics/revert-2858-dependabot/pip/docker/requirements/cryptography-3.2
Revert dependabot cryptography upgrade for old versions
2020-12-03 13:58:26 -05:00
Jeremy Cohen
9d90e0c167 tiny changelog fixup 2020-12-03 13:27:46 -05:00
Jeremy Cohen
a281f227cd Revert "Bump cryptography from 2.9.2 to 3.2 in /docker/requirements" 2020-12-03 12:12:15 -05:00
Maximilian Roos
5b981278db changelog 2020-12-02 14:59:35 -08:00
Maximilian Roos
c1091ed3d1 Merge branch 'dev/kiyoshi-kuromiya' into bq-default-project 2020-12-02 14:55:27 -08:00
Maximilian Roos
08aed63455 Formatting 2020-12-02 11:19:02 -08:00
Maximilian Roos
90a550ee4f Update plugins/bigquery/dbt/adapters/bigquery/connections.py
Co-authored-by: Kyle Wigley <kwigley44@gmail.com>
2020-12-02 10:41:20 -08:00
Jeremy Cohen
34869fc2a2 Merge pull request #2922 from plotneishestvo/snowflake_connector_upgrade
update cryptography package and snowflake connector
2020-12-02 12:34:34 -05:00
Pavel Plotnikov
3deb10156d Merge branch 'dev/kiyoshi-kuromiya' into snowflake_connector_upgrade 2020-12-02 12:46:02 +02:00
Maximilian Roos
8c0e84de05 Move method to module func 2020-12-01 16:19:20 -08:00
Joel Labes
23be083c39 Change models folder to ref_models folder 2020-12-02 11:59:21 +13:00
Joel Labes
217aafce39 Add line break to description, fix refs and maybe fix original_file_path 2020-12-02 11:47:29 +13:00
Joel Labes
03210c63f4 Blank instead of none description 2020-12-02 10:57:47 +13:00
Joel Labes
a90510f6f2 Ref a model that actually exists 2020-12-02 10:40:34 +13:00
Joel Labes
36d91aded6 Empty description for minimal/basic exposure object tests 2020-12-01 17:56:55 +13:00
Joel Labes
9afe8a1297 Default to empty string for ParsedExposure description 2020-12-01 17:35:42 +13:00
Maximilian Roos
1e6f272034 Add test config 2020-11-30 20:06:06 -08:00
Maximilian Roos
a1aa2f81ef _ 2020-11-30 19:30:07 -08:00
Maximilian Roos
62899ef308 _ 2020-11-30 16:54:21 -08:00
Joel Labes
7f3396c002 Forgot another comma 🤦 2020-12-01 12:46:26 +13:00
Joel Labes
453bc18196 Merge branch '2913-docs-block-exposures' of https://github.com/joellabes/dbt into 2913-docs-block-exposures 2020-12-01 12:42:11 +13:00
Joel Labes
dbb6b57b76 Forgot a comma 2020-12-01 12:40:51 +13:00
Joel Labes
d7137db78c Merge branch 'dev/kiyoshi-kuromiya' into 2913-docs-block-exposures 2020-12-01 12:34:29 +13:00
Joel Labes
5ac4f2d80b Move description arg to be below default-free args 2020-12-01 12:33:08 +13:00
Jeremy Cohen
5ba5271da9 Merge pull request #2903 from db-magnus/bq-hourly-part
Hourly, monthly and yearly partitions in BigQuery
2020-11-30 09:46:36 -05:00
Pavel Plotnikov
b834e3015a update changelog md 2020-11-30 14:46:51 +02:00
Joel Labes
c8721ded62 Code review: non-optional description, docs block tests, yaml exposure attributes 2020-11-30 20:29:47 +13:00
Magnus Fagertun
1e97372d24 Update test/unit/test_bigquery_adapter.py
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2020-11-30 07:26:36 +01:00
Magnus Fagertun
fd4e111784 Update test/unit/test_bigquery_adapter.py
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2020-11-30 00:44:25 +01:00
Magnus Fagertun
75094e7e21 Update test/unit/test_bigquery_adapter.py
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2020-11-30 00:44:15 +01:00
Joel Labes
8db2d674ed Update CHANGELOG.md 2020-11-28 15:08:13 +13:00
Pavel Plotnikov
ffb140fab3 update cryptography package and snowflake connector 2020-11-27 16:52:13 +02:00
Joel Labes
e93543983c Follow Jeremy's wild speculation 2020-11-27 22:45:31 +13:00
Magnus Fagertun
0d066f80ff added test and enhancements from jtcohen6 2020-11-25 21:41:51 +01:00
Magnus Fagertun
ccca1b2016 Update plugins/bigquery/dbt/adapters/bigquery/impl.py
Co-authored-by: Jeremy Cohen <jtcohen6@gmail.com>
2020-11-25 21:17:07 +01:00
Kyle Wigley
fec0e31a25 Merge pull request #2902 from fishtown-analytics/fix/test-selection
set default `materialized` for test node configs
2020-11-24 12:19:40 -05:00
Kyle Wigley
d246aa8f6d update readme 2020-11-24 10:40:01 -05:00
Maximilian Roos
66bfba2258 flake8 seems to sometimes be applied 2020-11-23 17:39:57 -08:00
Maximilian Roos
b53b4373cb Definet database exclusively in contracts/connection.py 2020-11-23 17:32:41 -08:00
Maximilian Roos
0810f93883 Allow BigQuery to default on project name 2020-11-23 16:58:54 -08:00
Maximilian Roos
a4e696a252 Remove duplicate raise 2020-11-23 15:34:43 -08:00
Jeremy Cohen
0951d08f52 Merge pull request #2877 from max-sixty/unlock-google-api
Wider google-cloud dependencies
2020-11-23 14:16:12 -05:00
Jeremy Cohen
dbf367e070 Merge branch 'dev/kiyoshi-kuromiya' into unlock-google-api 2020-11-23 11:46:07 -05:00
Magnus Fagertun
6447ba8ec8 whitespace cleanup 2020-11-22 10:00:10 +01:00
Magnus Fagertun
43e260966f uppercase and lowercase for date partitions supported 2020-11-21 01:21:07 +01:00
Magnus Fagertun
b0e301b046 typo in _partitions_match 2020-11-21 00:40:27 +01:00
Magnus Fagertun
c8a9ea4979 added month,year to date partitioning, granularity comparison to _partitions_match 2020-11-21 00:24:20 +01:00
Maximilian Roos
afb7fc05da Changelog 2020-11-20 14:58:46 -08:00
Magnus Fagertun
14124ccca8 added tests for datetime and timestamp 2020-11-20 00:10:15 +01:00
Magnus Fagertun
df5022dbc3 moving granularity to render, not to break tests 2020-11-19 18:51:05 +01:00
Magnus Fagertun
015e798a31 more BQ partitioning 2020-11-19 17:42:27 +01:00
Kyle Wigley
c19125bb02 Merge pull request #2893 from fishtown-analytics/feature/track-parse-time
Add event tracking for project parse/load time
2020-11-19 10:30:46 -05:00
Kyle Wigley
0e6ac5baf1 can we just default materialization to test? 2020-11-19 09:27:31 -05:00
Magnus Fagertun
2c8d1b5b8c Added hour, year, month partitioning BQ 2020-11-19 13:47:42 +01:00
Kyle Wigley
f7c0c1c21a fix tests 2020-11-18 17:21:41 -05:00
Kyle Wigley
4edd98f7ce update changelog 2020-11-18 16:19:58 -05:00
Kyle Wigley
df0abb7000 flake8 fixes 2020-11-18 16:19:58 -05:00
Kyle Wigley
4f93da307f add event to track loading time 2020-11-18 16:19:58 -05:00
Gerda Shank
a8765d54aa Merge pull request #2895 from fishtown-analytics/string_selectors
convert cli-style strings in selectors to normalized dictionaries
2020-11-18 15:53:23 -05:00
Jeremy Cohen
ec0f3d22e7 Merge pull request #2892 from rsella/dev/kiyoshi-kuromiya
Change dbt list command to always return 0 as exit code
2020-11-17 11:12:55 -05:00
Riccardo Sella
009b75cab6 Fix changelog and edit additional failing tests 2020-11-17 16:38:28 +01:00
Riccardo Sella
d64668df1e Change dbt list command to always return 0 as exit code 2020-11-17 14:49:24 +01:00
Maximilian Roos
8538bec99e _ 2020-11-11 13:48:41 -08:00
Maximilian Roos
f983900597 google-cloud-bigquery goes to 3 2020-11-10 23:51:15 -08:00
Maximilian Roos
8c71488757 _ 2020-11-10 08:38:43 -08:00
Maximilian Roos
7aa8c435c9 Bump protobuf too 2020-11-09 17:36:41 -08:00
Maximilian Roos
daeb51253d Unpin google-cloud dependencies 2020-11-09 17:18:42 -08:00
Drew Banin
1dd4187cd0 Merge branch '0.14.latest' 2019-09-05 14:32:23 -04:00
Connor McArthur
9e36ebdaab Merge branch '0.13.latest' of github.com:fishtown-analytics/dbt 2019-03-21 13:27:24 -04:00
Drew Banin
aaa0127354 Merge pull request #1241 from fishtown-analytics/0.12.latest
Merge 0.12.latest into master
2019-01-15 17:01:16 -05:00
Drew Banin
e60280c4d6 Merge branch '0.12.latest' 2018-11-15 12:24:05 -05:00
Drew Banin
aef7866e29 Update CHANGELOG.md 2018-11-13 10:36:35 -05:00
Drew Banin
70694e3bb9 Merge pull request #1118 from fishtown-analytics/0.12.latest
merge 0.12.latest to master
2018-11-13 10:19:56 -05:00
320 changed files with 7635 additions and 6249 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.19.0b1
current_version = 0.19.0
parse = (?P<major>\d+)
\.(?P<minor>\d+)
\.(?P<patch>\d+)

View File

@@ -2,12 +2,19 @@ version: 2.1
jobs:
unit:
docker: &test_only
- image: fishtownanalytics/test-container:9
- image: fishtownanalytics/test-container:11
environment:
DBT_INVOCATION_ENV: circle
DOCKER_TEST_DATABASE_HOST: "database"
TOX_PARALLEL_NO_SPINNER: 1
steps:
- checkout
- run: tox -e flake8,mypy,unit-py36,unit-py38
- run: tox -p -e py36,py37,py38
lint:
docker: *test_only
steps:
- checkout
- run: tox -e mypy,flake8 -- -v
build-wheels:
docker: *test_only
steps:
@@ -19,7 +26,7 @@ jobs:
export PYTHON_BIN="${PYTHON_ENV}/bin/python"
$PYTHON_BIN -m pip install -U pip setuptools
$PYTHON_BIN -m pip install -r requirements.txt
$PYTHON_BIN -m pip install -r dev_requirements.txt
$PYTHON_BIN -m pip install -r dev-requirements.txt
/bin/bash ./scripts/build-wheels.sh
$PYTHON_BIN ./scripts/collect-dbt-contexts.py > ./dist/context_metadata.json
$PYTHON_BIN ./scripts/collect-artifact-schema.py > ./dist/artifact_schemas.json
@@ -28,20 +35,22 @@ jobs:
- store_artifacts:
path: ./dist
destination: dist
integration-postgres-py36:
docker: &test_and_postgres
- image: fishtownanalytics/test-container:9
integration-postgres:
docker:
- image: fishtownanalytics/test-container:11
environment:
DBT_INVOCATION_ENV: circle
DOCKER_TEST_DATABASE_HOST: "database"
TOX_PARALLEL_NO_SPINNER: 1
- image: postgres
name: database
environment: &pgenv
environment:
POSTGRES_USER: "root"
POSTGRES_PASSWORD: "password"
POSTGRES_DB: "dbt"
steps:
- checkout
- run: &setupdb
- run:
name: Setup postgres
command: bash test/setup_db.sh
environment:
@@ -50,74 +59,39 @@ jobs:
PGPASSWORD: password
PGDATABASE: postgres
- run:
name: Run tests
command: tox -e integration-postgres-py36
name: Postgres integration tests
command: tox -p -e py36-postgres,py38-postgres -- -v -n4
no_output_timeout: 30m
- store_artifacts:
path: ./logs
integration-snowflake-py36:
integration-snowflake:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-snowflake-py36
no_output_timeout: 1h
name: Snowflake integration tests
command: tox -p -e py36-snowflake,py38-snowflake -- -v -n4
no_output_timeout: 30m
- store_artifacts:
path: ./logs
integration-redshift-py36:
integration-redshift:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-redshift-py36
name: Redshift integration tests
command: tox -p -e py36-redshift,py38-redshift -- -v -n4
no_output_timeout: 30m
- store_artifacts:
path: ./logs
integration-bigquery-py36:
integration-bigquery:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-bigquery-py36
- store_artifacts:
path: ./logs
integration-postgres-py38:
docker: *test_and_postgres
steps:
- checkout
- run: *setupdb
- run:
name: Run tests
command: tox -e integration-postgres-py38
- store_artifacts:
path: ./logs
integration-snowflake-py38:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-snowflake-py38
no_output_timeout: 1h
- store_artifacts:
path: ./logs
integration-redshift-py38:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-redshift-py38
- store_artifacts:
path: ./logs
integration-bigquery-py38:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-bigquery-py38
name: Bigquery integration test
command: tox -p -e py36-bigquery,py38-bigquery -- -v -n4
no_output_timeout: 30m
- store_artifacts:
path: ./logs
@@ -125,39 +99,25 @@ workflows:
version: 2
test-everything:
jobs:
- lint
- unit
- integration-postgres-py36:
- integration-postgres:
requires:
- unit
- integration-redshift-py36:
requires:
- integration-postgres-py36
- integration-bigquery-py36:
requires:
- integration-postgres-py36
- integration-snowflake-py36:
requires:
- integration-postgres-py36
- integration-postgres-py38:
- integration-redshift:
requires:
- unit
- integration-redshift-py38:
- integration-bigquery:
requires:
- integration-postgres-py38
- integration-bigquery-py38:
- unit
- integration-snowflake:
requires:
- integration-postgres-py38
- integration-snowflake-py38:
requires:
- integration-postgres-py38
- unit
- build-wheels:
requires:
- lint
- unit
- integration-postgres-py36
- integration-redshift-py36
- integration-bigquery-py36
- integration-snowflake-py36
- integration-postgres-py38
- integration-redshift-py38
- integration-bigquery-py38
- integration-snowflake-py38
- integration-postgres
- integration-redshift
- integration-bigquery
- integration-snowflake

45
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,45 @@
version: 2
updates:
# python dependencies
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
rebase-strategy: "disabled"
- package-ecosystem: "pip"
directory: "/core"
schedule:
interval: "daily"
rebase-strategy: "disabled"
- package-ecosystem: "pip"
directory: "/plugins/bigquery"
schedule:
interval: "daily"
rebase-strategy: "disabled"
- package-ecosystem: "pip"
directory: "/plugins/postgres"
schedule:
interval: "daily"
rebase-strategy: "disabled"
- package-ecosystem: "pip"
directory: "/plugins/redshift"
schedule:
interval: "daily"
rebase-strategy: "disabled"
- package-ecosystem: "pip"
directory: "/plugins/snowflake"
schedule:
interval: "daily"
rebase-strategy: "disabled"
# docker dependencies
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
rebase-strategy: "disabled"
- package-ecosystem: "docker"
directory: "/docker"
schedule:
interval: "weekly"
rebase-strategy: "disabled"

3
.gitignore vendored
View File

@@ -8,7 +8,7 @@ __pycache__/
# Distribution / packaging
.Python
env/
env*/
dbt_env/
build/
develop-eggs/
@@ -85,6 +85,7 @@ target/
# pycharm
.idea/
venv/
# AWS credentials
.aws/

49
ARCHITECTURE.md Normal file
View File

@@ -0,0 +1,49 @@
The core function of dbt is SQL compilation and execution. Users create projects of dbt resources (models, tests, seeds, snapshots, ...), defined in SQL and YAML files, and they invoke dbt to create, update, or query associated views and tables. Today, dbt makes heavy use of Jinja2 to enable the templating of SQL, and to construct a DAG (Directed Acyclic Graph) from all of the resources in a project. Users can also extend their projects by installing resources (including Jinja macros) from other projects, called "packages."
## dbt-core
Most of the python code in the repository is within the `core/dbt` directory. Currently the main subdirectories are:
- [`adapters`](core/dbt/adapters): Define base classes for behavior that is likely to differ across databases
- [`clients`](core/dbt/clients): Interface with dependencies (agate, jinja) or across operating systems
- [`config`](core/dbt/config): Reconcile user-supplied configuration from connection profiles, project files, and Jinja macros
- [`context`](core/dbt/context): Build and expose dbt-specific Jinja functionality
- [`contracts`](core/dbt/contracts): Define Python objects (dataclasses) that dbt expects to create and validate
- [`deps`](core/dbt/deps): Package installation and dependency resolution
- [`graph`](core/dbt/graph): Produce a `networkx` DAG of project resources, and selecting those resources given user-supplied criteria
- [`include`](core/dbt/include): The dbt "global project," which defines default implementations of Jinja2 macros
- [`parser`](core/dbt/parser): Read project files, validate, construct python objects
- [`rpc`](core/dbt/rpc): Provide remote procedure call server for invoking dbt, following JSON-RPC 2.0 spec
- [`task`](core/dbt/task): Set forth the actions that dbt can perform when invoked
### Invoking dbt
There are two supported ways of invoking dbt: from the command line and using an RPC server.
The "tasks" map to top-level dbt commands. So `dbt run` => task.run.RunTask, etc. Some are more like abstract base classes (GraphRunnableTask, for example) but all the concrete types outside of task/rpc should map to tasks. Currently one executes at a time. The tasks kick off their “Runners” and those do execute in parallel. The parallelism is managed via a thread pool, in GraphRunnableTask.
core/dbt/include/index.html
This is the docs website code. It comes from the dbt-docs repository, and is generated when a release is packaged.
## Adapters
dbt uses an adapter-plugin pattern to extend support to different databases, warehouses, query engines, etc. The four core adapters that are in the main repository, contained within the [`plugins`](plugins) subdirectory, are: Postgres Redshift, Snowflake and BigQuery. Other warehouses use adapter plugins defined in separate repositories (e.g. [dbt-spark](https://github.com/fishtown-analytics/dbt-spark), [dbt-presto](https://github.com/fishtown-analytics/dbt-presto)).
Each adapter is a mix of python, Jinja2, and SQL. The adapter code also makes heavy use of Jinja2 to wrap modular chunks of SQL functionality, define default implementations, and allow plugins to override it.
Each adapter plugin is a standalone python package that includes:
- `dbt/include/[name]`: A "sub-global" dbt project, of YAML and SQL files, that reimplements Jinja macros to use the adapter's supported SQL syntax
- `dbt/adapters/[name]`: Python modules that inherit, and optionally reimplement, the base adapter classes defined in dbt-core
- `setup.py`
The Postgres adapter code is the most central, and many of its implementations are used as the default defined in the dbt-core global project. The greater the distance of a data technology from Postgres, the more its adapter plugin may need to reimplement.
## Testing dbt
The [`test/`](test/) subdirectory includes unit and integration tests that run as continuous integration checks against open pull requests. Unit tests check mock inputs and outputs of specific python functions. Integration tests perform end-to-end dbt invocations against real adapters (Postgres, Redshift, Snowflake, BigQuery) and assert that the results match expectations. See [the contributing guide](CONTRIBUTING.md) for a step-by-step walkthrough of setting up a local development and testing environment.
## Everything else
- [docker](docker/): All dbt versions are published as Docker images on DockerHub. This subfolder contains the `Dockerfile` (constant) and `requirements.txt` (one for each version).
- [etc](etc/): Images for README
- [scripts](scripts/): Helper scripts for testing, releasing, and producing JSON schemas. These are not included in distributions of dbt, not are they rigorously tested—they're just handy tools for the dbt maintainers :)

View File

@@ -1,51 +1,194 @@
## dbt 0.19.0 (Release TBD)
## dbt 0.20.0 (Release TBD)
### Fixes
- Fix exit code from dbt debug not returning a failure when one of the tests fail ([#3017](https://github.com/fishtown-analytics/dbt/issues/3017))
- Auto-generated CTEs in tests and ephemeral models have lowercase names to comply with dbt coding conventions ([#3027](https://github.com/fishtown-analytics/dbt/issues/3027), [#3028](https://github.com/fishtown-analytics/dbt/issues/3028))
- Fix incorrect error message when a selector does not match any node [#3036](https://github.com/fishtown-analytics/dbt/issues/3036))
- Fix variable `_dbt_max_partition` declaration and initialization for BigQuery incremental models ([#2940](https://github.com/fishtown-analytics/dbt/issues/2940), [#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
- Moving from 'master' to 'HEAD' default branch in git ([#3057](https://github.com/fishtown-analytics/dbt/issues/3057), [#3104](https://github.com/fishtown-analytics/dbt/issues/3104), [#3117](https://github.com/fishtown-analytics/dbt/issues/3117)))
- Requirement on `dataclasses` is relaxed to be between `>=0.6,<0.9` allowing dbt to cohabit with other libraries which required higher versions. ([#3150](https://github.com/fishtown-analytics/dbt/issues/3150), [#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
- Add feature to add `_n` alias to same column names in SQL query ([#3147](https://github.com/fishtown-analytics/dbt/issues/3147), [#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
- Raise a proper error message if dbt parses a macro twice due to macro duplication or misconfiguration. ([#2449](https://github.com/fishtown-analytics/dbt/issues/2449), [#3165](https://github.com/fishtown-analytics/dbt/pull/3165))
- Fix exposures missing in graph context variable. ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
- Ensure that schema test macros are properly processed ([#3229](https://github.com/fishtown-analytics/dbt/issues/3229), [#3272](https://github.com/fishtown-analytics/dbt/pull/3272))
### Features
- Added macro get_partitions_metadata(table) to return partition metadata for partitioned table [#2596](https://github.com/fishtown-analytics/dbt/pull/2596)
- Added native python 're' module for regex in jinja templates [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
- Support commit hashes in dbt deps package revision ([#3268](https://github.com/fishtown-analytics/dbt/issues/3268), [#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
- Add optional configs for `require_partition_filter` and `partition_expiration_days` in BigQuery ([#1843](https://github.com/fishtown-analytics/dbt/issues/1843), [#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
- Fix for EOL SQL comments prevent entire line execution ([#2731](https://github.com/fishtown-analytics/dbt/issues/2731), [#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
- Add optional `merge_update_columns` config to specify columns to update for `merge` statements in BigQuery and Snowflake ([#1862](https://github.com/fishtown-analytics/dbt/issues/1862), [#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
- Use query comment JSON as job labels for BigQuery adapter when `query-comment.job-label` is set to `true` ([#2483](https://github.com/fishtown-analytics/dbt/issues/2483)), ([#3145](https://github.com/fishtown-analytics/dbt/pull/3145))
- Set application_name for Postgres connections ([#885](https://github.com/fishtown-analytics/dbt/issues/885), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
- Support disabling schema tests, and configuring tests from `dbt_project.yml` ([#3252](https://github.com/fishtown-analytics/dbt/issues/3252),
[#3253](https://github.com/fishtown-analytics/dbt/issues/3253), [#3257](https://github.com/fishtown-analytics/dbt/pull/3257))
- Add Jinja tag for tests ([#1173](https://github.com/fishtown-analytics/dbt/issues/1173), [#3261](https://github.com/fishtown-analytics/dbt/pull/3261))
### Under the hood
- Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062))
- Update script to collect and write json schema for dbt artifacts ([#2870](https://github.com/fishtown-analytics/dbt/issues/2870), [#3065](https://github.com/fishtown-analytics/dbt/pull/3065))
- Relax Google Cloud dependency pins to major versions. ([#3156](https://github.com/fishtown-analytics/dbt/pull/3156)
- Bump `snowflake-connector-python` and releated dependencies, support Python 3.9 ([#2985](https://github.com/fishtown-analytics/dbt/issues/2985), [#3148](https://github.com/fishtown-analytics/dbt/pull/3148))
- General development environment clean up and improve experience running tests locally ([#3194](https://github.com/fishtown-analytics/dbt/issues/3194), [#3204](https://github.com/fishtown-analytics/dbt/pull/3204), [#3228](https://github.com/fishtown-analytics/dbt/pull/3228))
- Add a new materialization for tests, update data tests to use test materialization when executing. ([#3154](https://github.com/fishtown-analytics/dbt/issues/3154), [#3181](https://github.com/fishtown-analytics/dbt/pull/3181))
- Switch from externally storing parsing state in ParseResult object to using Manifest ([#3163](http://github.com/fishtown-analytics/dbt/issues/3163), [#3219](https://github.com/fishtown-analytics/dbt/pull/3219))
- Switch from loading project files in separate parsers to loading in one place([#3244](http://github.com/fishtown-analytics/dbt/issues/3244), [#3248](https://github.com/fishtown-analytics/dbt/pull/3248))
Contributors:
- [@yu-iskw](https://github.com/yu-iskw) ([#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
- [@sdebruyn](https://github.com/sdebruyn) / [@lynxcare](https://github.com/lynxcare) ([#3018](https://github.com/fishtown-analytics/dbt/pull/3018))
- [@rvacaru](https://github.com/rvacaru) ([#2974](https://github.com/fishtown-analytics/dbt/pull/2974))
- [@NiallRees](https://github.com/NiallRees) ([#3028](https://github.com/fishtown-analytics/dbt/pull/3028))
- [ran-eh](https://github.com/ran-eh) ([#3036](https://github.com/fishtown-analytics/dbt/pull/3036))
- [@pcasteran](https://github.com/pcasteran) ([#2976](https://github.com/fishtown-analytics/dbt/pull/2976))
- [@VasiliiSurov](https://github.com/VasiliiSurov) ([#3104](https://github.com/fishtown-analytics/dbt/pull/3104))
- [@jmcarp](https://github.com/jmcarp) ([#3145](https://github.com/fishtown-analytics/dbt/pull/3145))
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#3151](https://github.com/fishtown-analytics/dbt/pull/3151))
- [@max-sixty](https://github.com/max-sixty) ([#3156](https://github.com/fishtown-analytics/dbt/pull/3156)
- [@prratek](https://github.com/prratek) ([#3100](https://github.com/fishtown-analytics/dbt/pull/3100))
- [@techytushar](https://github.com/techytushar) ([#3158](https://github.com/fishtown-analytics/dbt/pull/3158))
- [@cgopalan](https://github.com/cgopalan) ([#3165](https://github.com/fishtown-analytics/dbt/pull/3165), [#3182](https://github.com/fishtown-analytics/dbt/pull/3182))
- [@fux](https://github.com/fuchsst) ([#3241](https://github.com/fishtown-analytics/dbt/issues/3241))
- [@dmateusp](https://github.com/dmateusp) ([#3270](https://github.com/fishtown-analytics/dbt/pull/3270))
## dbt 0.19.1 (March 31, 2021)
## dbt 0.19.1rc2 (March 25, 2021)
### Fixes
- Pass service-account scopes to gcloud-based oauth ([#3040](https://github.com/fishtown-analytics/dbt/issues/3040), [#3041](https://github.com/fishtown-analytics/dbt/pull/3041))
Contributors:
- [@yu-iskw](https://github.com/yu-iskw) ([#3041](https://github.com/fishtown-analytics/dbt/pull/3041))
## dbt 0.19.1rc1 (March 15, 2021)
### Under the hood
- Update code to use Mashumaro 2.0 ([#3138](https://github.com/fishtown-analytics/dbt/pull/3138))
- Pin `agate<1.6.2` to avoid installation errors relating to its new dependency `PyICU` ([#3160](https://github.com/fishtown-analytics/dbt/issues/3160), [#3161](https://github.com/fishtown-analytics/dbt/pull/3161))
- Add an event to track resource counts ([#3050](https://github.com/fishtown-analytics/dbt/issues/3050), [#3157](https://github.com/fishtown-analytics/dbt/pull/3157))
### Fixes
- Fix compiled sql for ephemeral models ([#3139](https://github.com/fishtown-analytics/dbt/pull/3139), [#3056](https://github.com/fishtown-analytics/dbt/pull/3056))
## dbt 0.19.1b2 (February 15, 2021)
## dbt 0.19.1b1 (February 12, 2021)
### Fixes
- On BigQuery, fix regressions for `insert_overwrite` incremental strategy with `int64` and `timestamp` partition columns ([#3063](https://github.com/fishtown-analytics/dbt/issues/3063), [#3095](https://github.com/fishtown-analytics/dbt/issues/3095), [#3098](https://github.com/fishtown-analytics/dbt/issues/3098))
### Under the hood
- Bump werkzeug upper bound dependency to `<v2.0` ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
- Performance fixes for many different things ([#2862](https://github.com/fishtown-analytics/dbt/issues/2862), [#3034](https://github.com/fishtown-analytics/dbt/pull/3034))
Contributors:
- [@Bl3f](https://github.com/Bl3f) ([#3011](https://github.com/fishtown-analytics/dbt/pull/3011))
## dbt 0.19.0 (January 27, 2021)
## dbt 0.19.0rc3 (January 27, 2021)
### Under the hood
- Cleanup docker resources, use single `docker/Dockerfile` for publishing dbt as a docker image ([dbt-release#3](https://github.com/fishtown-analytics/dbt-release/issues/3), [#3019](https://github.com/fishtown-analytics/dbt/pull/3019))
## dbt 0.19.0rc2 (January 14, 2021)
### Fixes
- Fix regression with defining exposures and other resources with the same name ([#2969](https://github.com/fishtown-analytics/dbt/issues/2969), [#3009](https://github.com/fishtown-analytics/dbt/pull/3009))
- Remove ellipses printed while parsing ([#2971](https://github.com/fishtown-analytics/dbt/issues/2971), [#2996](https://github.com/fishtown-analytics/dbt/pull/2996))
### Under the hood
- Rewrite macro for snapshot_merge_sql to make compatible with other SQL dialects ([#3003](https://github.com/fishtown-analytics/dbt/pull/3003)
- Rewrite logic in `snapshot_check_strategy()` to make compatible with other SQL dialects ([#3000](https://github.com/fishtown-analytics/dbt/pull/3000), [#3001](https://github.com/fishtown-analytics/dbt/pull/3001))
- Remove version restrictions on `botocore` ([#3006](https://github.com/fishtown-analytics/dbt/pull/3006))
- Include `exposures` in start-of-invocation stdout summary: `Found ...` ([#3007](https://github.com/fishtown-analytics/dbt/pull/3007), [#3008](https://github.com/fishtown-analytics/dbt/pull/3008))
Contributors:
- [@mikaelene](https://github.com/mikaelene) ([#3003](https://github.com/fishtown-analytics/dbt/pull/3003))
- [@dbeatty10](https://github.com/dbeatty10) ([dbt-adapter-tests#10](https://github.com/fishtown-analytics/dbt-adapter-tests/pull/10))
- [@swanderz](https://github.com/swanderz) ([#3000](https://github.com/fishtown-analytics/dbt/pull/3000))
- [@stpierre](https://github.com/stpierre) ([#3006](https://github.com/fishtown-analytics/dbt/pull/3006))
## dbt 0.19.0rc1 (December 29, 2020)
### Breaking changes
- Defer if and only if upstream reference does not exist in current environment namespace ([#2909](https://github.com/fishtown-analytics/dbt/issues/2909), [#2946](https://github.com/fishtown-analytics/dbt/pull/2946))
- Rationalize run result status reporting and clean up artifact schema ([#2493](https://github.com/fishtown-analytics/dbt/issues/2493), [#2943](https://github.com/fishtown-analytics/dbt/pull/2943))
- Add adapter specific query execution info to run results and source freshness results artifacts. Statement call blocks return `response` instead of `status`, and the adapter method `get_status` is now `get_response` ([#2747](https://github.com/fishtown-analytics/dbt/issues/2747), [#2961](https://github.com/fishtown-analytics/dbt/pull/2961))
### Features
- Added macro `get_partitions_metadata(table)` to return partition metadata for BigQuery partitioned tables ([#2552](https://github.com/fishtown-analytics/dbt/pull/2552), [#2596](https://github.com/fishtown-analytics/dbt/pull/2596))
- Added `--defer` flag for `dbt test` as well ([#2701](https://github.com/fishtown-analytics/dbt/issues/2701), [#2954](https://github.com/fishtown-analytics/dbt/pull/2954))
- Added native python `re` module for regex in jinja templates ([#1755](https://github.com/fishtown-analytics/dbt/pull/2851), [#1755](https://github.com/fishtown-analytics/dbt/pull/2851))
- Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866))
- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895))
- Hourly, monthly and yearly partitions available in BigQuery ([#2476](https://github.com/fishtown-analytics/dbt/issues/2476), [#2903](https://github.com/fishtown-analytics/dbt/pull/2903))
- Allow BigQuery to default to the environment's default project ([#2828](https://github.com/fishtown-analytics/dbt/pull/2828), [#2908](https://github.com/fishtown-analytics/dbt/pull/2908))
- Rationalize run result status reporting and clean up artifact schema ([#2493](https://github.com/fishtown-analytics/dbt/issues/2493), [#2943](https://github.com/fishtown-analytics/dbt/pull/2943))
### Fixes
- Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
- Fix Redshift adapter `get_columns_in_relation` macro to push schema filter down to the `svv_external_columns` view ([#2855](https://github.com/fishtown-analytics/dbt/issues/2854))
- Add `unixodbc-dev` package to testing docker image ([#2859](https://github.com/fishtown-analytics/dbt/pull/2859))
- Respect `--project-dir` in `dbt clean` command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
- Fix Redshift adapter `get_columns_in_relation` macro to push schema filter down to the `svv_external_columns` view ([#2854](https://github.com/fishtown-analytics/dbt/issues/2854), [#2854](https://github.com/fishtown-analytics/dbt/issues/2854))
- Increased the supported relation name length in postgres from 29 to 51 ([#2850](https://github.com/fishtown-analytics/dbt/pull/2850))
- `dbt list` command always return `0` as exit code ([#2886](https://github.com/fishtown-analytics/dbt/issues/2886), [#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
- Set default `materialized` for test node configs to `test` ([#2806](https://github.com/fishtown-analytics/dbt/issues/2806), [#2902](https://github.com/fishtown-analytics/dbt/pull/2902))
- Allow `docs` blocks in `exposure` descriptions ([#2913](https://github.com/fishtown-analytics/dbt/issues/2913), [#2920](https://github.com/fishtown-analytics/dbt/pull/2920))
- Use original file path instead of absolute path as checksum for big seeds ([#2927](https://github.com/fishtown-analytics/dbt/issues/2927), [#2939](https://github.com/fishtown-analytics/dbt/pull/2939))
- Fix KeyError if deferring to a manifest with a since-deleted source, ephemeral model, or test ([#2875](https://github.com/fishtown-analytics/dbt/issues/2875), [#2958](https://github.com/fishtown-analytics/dbt/pull/2958))
### Under the hood
- Bump hologram version to 0.0.11. Add scripts/dtr.py ([#2888](https://github.com/fishtown-analytics/dbt/issues/2840),[#2889](https://github.com/fishtown-analytics/dbt/pull/2889))
- Add `unixodbc-dev` package to testing docker image ([#2859](https://github.com/fishtown-analytics/dbt/pull/2859))
- Add event tracking for project parser/load times ([#2823](https://github.com/fishtown-analytics/dbt/issues/2823),[#2893](https://github.com/fishtown-analytics/dbt/pull/2893))
- Bump `cryptography` version to `>= 3.2` and bump snowflake connector to `2.3.6` ([#2896](https://github.com/fishtown-analytics/dbt/issues/2896), [#2922](https://github.com/fishtown-analytics/dbt/issues/2922))
- Widen supported Google Cloud libraries dependencies ([#2794](https://github.com/fishtown-analytics/dbt/pull/2794), [#2877](https://github.com/fishtown-analytics/dbt/pull/2877)).
- Bump `hologram` version to `0.0.11`. Add `scripts/dtr.py` ([#2888](https://github.com/fishtown-analytics/dbt/issues/2840),[#2889](https://github.com/fishtown-analytics/dbt/pull/2889))
- Bump `hologram` version to `0.0.12`. Add testing support for python3.9 ([#2822](https://github.com/fishtown-analytics/dbt/issues/2822),[#2960](https://github.com/fishtown-analytics/dbt/pull/2960))
- Bump the version requirements for `boto3` in dbt-redshift to the upper limit `1.16` to match dbt-redshift and the `snowflake-python-connector` as of version `2.3.6`. ([#2931](https://github.com/fishtown-analytics/dbt/issues/2931), ([#2963](https://github.com/fishtown-analytics/dbt/issues/2963))
### Docs
- Fixed issue where data tests with tags were not showing up in graph viz ([docs#147](https://github.com/fishtown-analytics/dbt-docs/issues/147), [docs#157](https://github.com/fishtown-analytics/dbt-docs/pull/157))
Contributors:
- [@feluelle](https://github.com/feluelle) ([#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
- [ran-eh](https://github.com/ran-eh) [#2596](https://github.com/fishtown-analytics/dbt/pull/2596)
- [@hochoy](https://github.com/hochoy) [#2851](https://github.com/fishtown-analytics/dbt/pull/2851)
- [@brangisom](https://github.com/brangisom) [#2855](https://github.com/fishtown-analytics/dbt/pull/2855)
- [ran-eh](https://github.com/ran-eh) ([#2596](https://github.com/fishtown-analytics/dbt/pull/2596))
- [@hochoy](https://github.com/hochoy) ([#2851](https://github.com/fishtown-analytics/dbt/pull/2851))
- [@brangisom](https://github.com/brangisom) ([#2855](https://github.com/fishtown-analytics/dbt/pull/2855))
- [@elexisvenator](https://github.com/elexisvenator) ([#2850](https://github.com/fishtown-analytics/dbt/pull/2850))
- [@franloza](https://github.com/franloza) ([#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- [@max-sixty](https://github.com/max-sixty) ([#2877](https://github.com/fishtown-analytics/dbt/pull/2877), [#2908](https://github.com/fishtown-analytics/dbt/pull/2908))
- [@rsella](https://github.com/rsella) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
- [@joellabes](https://github.com/joellabes) ([#2913](https://github.com/fishtown-analytics/dbt/issues/2913))
- [@plotneishestvo](https://github.com/plotneishestvo) ([#2896](https://github.com/fishtown-analytics/dbt/issues/2896))
- [@db-magnus](https://github.com/db-magnus) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
- [@tyang209](https:/github.com/tyang209) ([#2931](https://github.com/fishtown-analytics/dbt/issues/2931))
## dbt 0.19.0b1 (October 21, 2020)
### Breaking changes
- The format for sources.json, run-results.json, manifest.json, and catalog.json has changed to include a common metadata field ([#2761](https://github.com/fishtown-analytics/dbt/issues/2761), [#2778](https://github.com/fishtown-analytics/dbt/pull/2778), [#2763](https://github.com/fishtown-analytics/dbt/issues/2763), [#2784](https://github.com/fishtown-analytics/dbt/pull/2784), [#2764](https://github.com/fishtown-analytics/dbt/issues/2764), [#2785](https://github.com/fishtown-analytics/dbt/pull/2785))
- The format for `sources.json`, `run-results.json`, `manifest.json`, and `catalog.json` has changed:
- Each now has a common metadata dictionary ([#2761](https://github.com/fishtown-analytics/dbt/issues/2761), [#2778](https://github.com/fishtown-analytics/dbt/pull/2778)). The contents include: schema and dbt versions ([#2670](https://github.com/fishtown-analytics/dbt/issues/2670), [#2767](https://github.com/fishtown-analytics/dbt/pull/2767)); `invocation_id` ([#2763](https://github.com/fishtown-analytics/dbt/issues/2763), [#2784](https://github.com/fishtown-analytics/dbt/pull/2784)); custom environment variables prefixed with `DBT_ENV_CUSTOM_ENV_` ([#2764](https://github.com/fishtown-analytics/dbt/issues/2764), [#2785](https://github.com/fishtown-analytics/dbt/pull/2785)); cli and rpc arguments in the `run_results.json` ([#2510](https://github.com/fishtown-analytics/dbt/issues/2510), [#2813](https://github.com/fishtown-analytics/dbt/pull/2813)).
- Remove `injected_sql` from manifest nodes, use `compiled_sql` instead ([#2762](https://github.com/fishtown-analytics/dbt/issues/2762), [#2834](https://github.com/fishtown-analytics/dbt/pull/2834))
### Features
- dbt will compare configurations using the un-rendered form of the config block in dbt_project.yml ([#2713](https://github.com/fishtown-analytics/dbt/issues/2713), [#2735](https://github.com/fishtown-analytics/dbt/pull/2735))
- dbt will compare configurations using the un-rendered form of the config block in `dbt_project.yml` ([#2713](https://github.com/fishtown-analytics/dbt/issues/2713), [#2735](https://github.com/fishtown-analytics/dbt/pull/2735))
- Added state and defer arguments to the RPC client, matching the CLI ([#2678](https://github.com/fishtown-analytics/dbt/issues/2678), [#2736](https://github.com/fishtown-analytics/dbt/pull/2736))
- Added schema and dbt versions to JSON artifacts ([#2670](https://github.com/fishtown-analytics/dbt/issues/2670), [#2767](https://github.com/fishtown-analytics/dbt/pull/2767))
- Added ability to snapshot hard-deleted records (opt-in with `invalidate_hard_deletes` config option). ([#249](https://github.com/fishtown-analytics/dbt/issues/249), [#2749](https://github.com/fishtown-analytics/dbt/pull/2749))
- Added revival for snapshotting hard-deleted records. ([#2819](https://github.com/fishtown-analytics/dbt/issues/2819), [#2821](https://github.com/fishtown-analytics/dbt/pull/2821))
- Improved error messages for YAML selectors ([#2700](https://github.com/fishtown-analytics/dbt/issues/2700), [#2781](https://github.com/fishtown-analytics/dbt/pull/2781))
- Save manifest at the same time we save the run_results at the end of a run ([#2765](https://github.com/fishtown-analytics/dbt/issues/2765), [#2799](https://github.com/fishtown-analytics/dbt/pull/2799))
- Added dbt_invocation_id for each BigQuery job to enable performance analysis ([#2808](https://github.com/fishtown-analytics/dbt/issues/2808), [#2809](https://github.com/fishtown-analytics/dbt/pull/2809))
- Save cli and rpc arguments in run_results.json ([#2510](https://github.com/fishtown-analytics/dbt/issues/2510), [#2813](https://github.com/fishtown-analytics/dbt/pull/2813))
- Added `dbt_invocation_id` for each BigQuery job to enable performance analysis ([#2808](https://github.com/fishtown-analytics/dbt/issues/2808), [#2809](https://github.com/fishtown-analytics/dbt/pull/2809))
- Added support for BigQuery connections using refresh tokens ([#2344](https://github.com/fishtown-analytics/dbt/issues/2344), [#2805](https://github.com/fishtown-analytics/dbt/pull/2805))
- Remove injected_sql from manifest nodes ([#2762](https://github.com/fishtown-analytics/dbt/issues/2762), [#2834](https://github.com/fishtown-analytics/dbt/pull/2834))
### Under the hood
- Save `manifest.json` at the same time we save the `run_results.json` at the end of a run ([#2765](https://github.com/fishtown-analytics/dbt/issues/2765), [#2799](https://github.com/fishtown-analytics/dbt/pull/2799))
- Added strategy-specific validation to improve the relevancy of compilation errors for the `timestamp` and `check` snapshot strategies. (([#2787](https://github.com/fishtown-analytics/dbt/issues/2787), [#2791](https://github.com/fishtown-analytics/dbt/pull/2791))
- Changed rpc test timeouts to avoid locally run test failures ([#2803](https://github.com/fishtown-analytics/dbt/issues/2803),[#2804](https://github.com/fishtown-analytics/dbt/pull/2804))
- Added a debug_query on the base adapter that will allow plugin authors to create custom debug queries ([#2751](https://github.com/fishtown-analytics/dbt/issues/2751),[#2871](https://github.com/fishtown-analytics/dbt/pull/2817))
- Added a `debug_query` on the base adapter that will allow plugin authors to create custom debug queries ([#2751](https://github.com/fishtown-analytics/dbt/issues/2751),[#2871](https://github.com/fishtown-analytics/dbt/pull/2817))
### Docs
- Add select/deselect option in DAG view dropups. ([docs#98](https://github.com/fishtown-analytics/dbt-docs/issues/98), [docs#138](https://github.com/fishtown-analytics/dbt-docs/pull/138))
@@ -59,6 +202,15 @@ Contributors:
- [@Mr-Nobody99](https://github.com/Mr-Nobody99) ([docs#138](https://github.com/fishtown-analytics/dbt-docs/pull/138))
- [@jplynch77](https://github.com/jplynch77) ([docs#139](https://github.com/fishtown-analytics/dbt-docs/pull/139))
## dbt 0.18.2 (March 22, 2021)
## dbt 0.18.2rc1 (March 12, 2021)
### Under the hood
- Pin `agate<1.6.2` to avoid installation errors relating to its new dependency
`PyICU` ([#3160](https://github.com/fishtown-analytics/dbt/issues/3160),
[#3161](https://github.com/fishtown-analytics/dbt/pull/3161))
## dbt 0.18.1 (October 13, 2020)
## dbt 0.18.1rc1 (October 01, 2020)
@@ -160,7 +312,6 @@ Contributors:
- Add relevance criteria to site search ([docs#113](https://github.com/fishtown-analytics/dbt-docs/pull/113))
- Support new selector methods, intersection, and arbitrary parent/child depth in DAG selection syntax ([docs#118](https://github.com/fishtown-analytics/dbt-docs/pull/118))
- Revise anonymous event tracking: simpler URL fuzzing; differentiate between Cloud-hosted and non-Cloud docs ([docs#121](https://github.com/fishtown-analytics/dbt-docs/pull/121))
Contributors:
- [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677))
- [@kconvey](https://github.com/kconvey) ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), [#2709](https://github.com/fishtown-analytics/dbt/pull/2709)), [#2711](https://github.com/fishtown-analytics/dbt/pull/2711))
@@ -880,7 +1031,6 @@ Thanks for your contributions to dbt!
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689))
## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)
### Overview

View File

@@ -1,79 +1,86 @@
# Contributing to dbt
# Contributing to `dbt`
1. [About this document](#about-this-document)
2. [Proposing a change](#proposing-a-change)
3. [Getting the code](#getting-the-code)
4. [Setting up an environment](#setting-up-an-environment)
5. [Running dbt in development](#running-dbt-in-development)
5. [Running `dbt` in development](#running-dbt-in-development)
6. [Testing](#testing)
7. [Submitting a Pull Request](#submitting-a-pull-request)
## About this document
This document is a guide intended for folks interested in contributing to dbt. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using dbt, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
This document is a guide intended for folks interested in contributing to `dbt`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the #development channel on [slack](community.getdbt.com).
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the `#dbt-core-development` channel on [slack](https://community.getdbt.com).
### Signing the CLA
Please note that all contributors to dbt must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the dbt codebase. If you are unable to sign the CLA, then the dbt maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
Please note that all contributors to `dbt` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt` codebase. If you are unable to sign the CLA, then the `dbt` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
## Proposing a change
dbt is Apache 2.0-licensed open source software. dbt is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
`dbt` is Apache 2.0-licensed open source software. `dbt` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
### Defining the problem
If you have an idea for a new feature or if you've discovered a bug in dbt, the first step is to open an issue. Please check the list of [open issues](https://github.com/fishtown-analytics/dbt/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The dbt maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
If you have an idea for a new feature or if you've discovered a bug in `dbt`, the first step is to open an issue. Please check the list of [open issues](https://github.com/fishtown-analytics/dbt/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
**Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
> **Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
### Discussing the idea
After you open an issue, a dbt maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The dbt maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
After you open an issue, a `dbt` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
### Submitting a change
If an issue is appropriately well scoped and describes a beneficial change to the dbt codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
If an issue is appropriately well scoped and describes a beneficial change to the `dbt` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
The dbt maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/fishtown-analytics/dbt/contribute) page.
The `dbt` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/fishtown-analytics/dbt/contribute) page.
Here's a good workflow:
- Comment on the open issue, expressing your interest in contributing the required code change
- Outline your planned implementation. If you want help getting started, ask!
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the dbt maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding dbt's [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt` maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
In some cases, the right resolution to an open issue might be tangential to the dbt codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the dbt maintainers are unwilling or unable to incorporate into the dbt codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the dbt repository, the issue will be tagged with the `wontfix` label (see below) and closed.
In some cases, the right resolution to an open issue might be tangential to the `dbt` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt` maintainers are unwilling or unable to incorporate into the `dbt` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
### Using issue labels
The dbt maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the dbt codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
The `dbt` maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the `dbt` codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
| tag | description |
| --- | ----------- |
| [triage](https://github.com/fishtown-analytics/dbt/labels/triage) | This is a new issue which has not yet been reviewed by a dbt maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/fishtown-analytics/dbt/labels/bug) | This issue represents a defect or regression in dbt |
| [enhancement](https://github.com/fishtown-analytics/dbt/labels/enhancement) | This issue represents net-new functionality in dbt |
| [good first issue](https://github.com/fishtown-analytics/dbt/labels/good%20first%20issue) | This issue does not require deep knowledge of the dbt codebase to implement. This issue is appropriate for a first-time contributor. |
| [help wanted](https://github.com/fishtown-analytics/dbt/labels/help%20wanted) / [discussion](https://github.com/fishtown-analytics/dbt/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
| [duplicate](https://github.com/fishtown-analytics/dbt/issues/duplicate) | This issue is functionally identical to another open issue. The dbt maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/fishtown-analytics/dbt/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The dbt maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/fishtown-analytics/dbt/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by dbt maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/fishtown-analytics/dbt/labels/wontfix) | This issue does not require a code change in the dbt repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
| [triage](https://github.com/fishtown-analytics/dbt/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/fishtown-analytics/dbt/labels/bug) | This issue represents a defect or regression in `dbt` |
| [enhancement](https://github.com/fishtown-analytics/dbt/labels/enhancement) | This issue represents net-new functionality in `dbt` |
| [good first issue](https://github.com/fishtown-analytics/dbt/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt` codebase to implement. This issue is appropriate for a first-time contributor. |
| [help wanted](https://github.com/fishtown-analytics/`dbt`/labels/help%20wanted) / [discussion](https://github.com/fishtown-analytics/dbt/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
| [duplicate](https://github.com/fishtown-analytics/dbt/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt` maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/fishtown-analytics/dbt/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt` maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/fishtown-analytics/dbt/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt` maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/fishtown-analytics/dbt/labels/wontfix) | This issue does not require a code change in the `dbt` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
#### Branching Strategy
`dbt` has three types of branches:
- **Trunks** are where active development of the next release takes place. There is one trunk named `develop` at the time of writing this, and will be the default branch of the repository.
- **Release Branches** track a specific, not yet complete release of `dbt`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt`.
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk brnach or a specific release branch.
## Getting the code
### Installing git
You will need `git` in order to download and modify the dbt source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
You will need `git` in order to download and modify the `dbt` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
### External contributors
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to dbt by forking the dbt repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to `dbt` by forking the `dbt` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
1. fork the dbt repository
1. fork the `dbt` repository
2. clone your fork locally
3. check out a new branch for your proposed changes
4. push changes to your fork
@@ -81,32 +88,30 @@ If you are not a member of the `fishtown-analytics` GitHub organization, you can
### Core contributors
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the dbt repo. Rather than
forking dbt to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the `dbt` repo. Rather than forking `dbt` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
## Setting up an environment
There are some tools that will be helpful to you in developing locally. While this is the list relevant for dbt development, many of these tools are used commonly across open-source python projects.
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt` development, many of these tools are used commonly across open-source python projects.
### Tools
A short list of tools used in dbt testing that will be helpful to your understanding:
A short list of tools used in `dbt` testing that will be helpful to your understanding:
- [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage dependencies
- [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions
- [pytest](https://docs.pytest.org/en/latest/) to discover/run tests
- [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [flake8](https://gitlab.com/pycqa/flake8) for code linting
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.6, Python 3.7, Python 3.8, and Python 3.9
- [`pytest`](https://docs.pytest.org/en/latest/) to discover/run tests
- [`make`](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [`flake8`](https://flake8.pycqa.org/en/latest/) for code linting
- [`mypy`](https://mypy.readthedocs.io/en/stable/) for static type checking
- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/)
A deep understanding of these tools in not required to effectively contribute to dbt, but we recommend checking out the attached documentation if you're interested in learning more about them.
A deep understanding of these tools in not required to effectively contribute to `dbt`, but we recommend checking out the attached documentation if you're interested in learning more about them.
#### virtual environments
We strongly recommend using virtual environments when developing code in dbt. We recommend creating this virtualenv
in the root of the dbt repository. To create a new virtualenv, run:
```
We strongly recommend using virtual environments when developing code in `dbt`. We recommend creating this virtualenv
in the root of the `dbt` repository. To create a new virtualenv, run:
```sh
python3 -m venv env
source env/bin/activate
```
@@ -115,30 +120,32 @@ This will create and activate a new Python virtual environment.
#### docker and docker-compose
Docker and docker-compose are both used in testing. For macOS, the easiest thing to do is to [download docker for mac](https://store.docker.com/editions/community/docker-ce-desktop-mac). You'll need to make an account. On Linux, you can use one of the packages [here](https://docs.docker.com/install/#server). We recommend installing from docker.com instead of from your package manager. On Linux you also have to install docker-compose separately, following [these instructions](https://docs.docker.com/compose/install/#install-compose).
Docker and docker-compose are both used in testing. Specific instructions for you OS can be found [here](https://docs.docker.com/get-docker/).
#### postgres (optional)
For testing, and later in the examples in this document, you may want to have `psql` available so you can poke around in the database and see what happened. We recommend that you use [homebrew](https://brew.sh/) for that on macOS, and your package manager on Linux. You can install any version of the postgres client that you'd like. On macOS, with homebrew setup, you can run:
```
```sh
brew install postgresql
```
## Running dbt in development
## Running `dbt` in development
### Installation
First make sure that you set up your `virtualenv` as described in section _Setting up an environment_. Next, install dbt (and its dependencies) with:
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Next, install `dbt` (and its dependencies) with:
```
pip install -r editable_requirements.txt
```sh
make dev
# or
pip install -r dev-requirements.txt -r editable-requirements.txt
```
When dbt is installed from source in this way, any changes you make to the dbt source code will be reflected immediately in your next `dbt` run.
When `dbt` is installed this way, any changes you make to the `dbt` source code will be reflected immediately in your next `dbt` run.
### Running dbt
### Running `dbt`
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
@@ -146,77 +153,79 @@ Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as
## Testing
Getting the dbt integration tests set up in your local environment will be very helpful as you start to make changes to your local version of dbt. The section that follows outlines some helpful tips for setting up the test environment.
Getting the `dbt` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt`. The section that follows outlines some helpful tips for setting up the test environment.
### Running tests via Docker
Since `dbt` works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on [_Submitting a Pull Request_](#submitting-a-pull-request) below for more information on this CI setup.
dbt's unit and integration tests run in Docker. Because dbt works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on [_Submitting a Pull Request_](#submitting-a-pull-request) below for more information on this CI setup.
### Initial setup
We recommend starting with `dbt`'s Postgres tests. These tests cover most of the functionality in `dbt`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
### Specifying your test credentials
dbt uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against dbt. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials:
```
cp test.env.sample test.env
atom test.env # supply your credentials
```
We recommend starting with dbt's Postgres tests. These tests cover most of the functionality in dbt, are the fastest to run, and are the easiest to set up. dbt's test suite runs Postgres in a Docker container, so no setup should be required to run these tests.
If you additionally want to test Snowflake, Bigquery, or Redshift, locally you'll need to get credentials and add them to the `test.env` file. In general, it's most important to have successful unit and Postgres tests. Once you open a PR, dbt will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
### Test commands
dbt's unit tests and Python linter can be run with:
```
make test-unit
```
To run the Postgres + Python 3.6 integration tests, you'll have to do one extra step of setting up the test database:
```sh
make setup-db
```
or, alternatively:
```sh
docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
```
To run a quick test for Python3 integration tests on Postgres, you can run:
`dbt` uses test credentials specified in a `test.env` file in the root of the repository for non-Postgres databases. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against `dbt`. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials. This step is only required to use non-Postgres databases.
```
make test-quick
cp test.env.sample test.env
$EDITOR test.env
```
To run tests for a specific database, invoke `tox` directly with the required flags:
```
# Run Postgres py36 tests
docker-compose run test tox -e integration-postgres-py36 -- -x
> In general, it's most important to have successful unit and Postgres tests. Once you open a PR, `dbt` will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
# Run Snowflake py36 tests
docker-compose run test tox -e integration-snowflake-py36 -- -x
### Test commands
# Run BigQuery py36 tests
docker-compose run test tox -e integration-bigquery-py36 -- -x
There are a few methods for running tests locally.
# Run Redshift py36 tests
docker-compose run test tox -e integration-redshift-py36 -- -x
```
#### Makefile
To run a specific test by itself:
```
docker-compose run test tox -e explicit-py36 -- -s -x -m profile_{adapter} {path_to_test_file_or_folder}
```
E.g.
```
docker-compose run test tox -e explicit-py36 -- -s -x -m profile_snowflake test/integration/001_simple_copy_test
```
There are multiple targets in the Makefile to run common test suites and code
checks, most notably:
See the `Makefile` contents for more some other examples of ways to run `tox`.
```sh
# Runs unit tests with py38 and code checks in parallel.
make test
# Runs postgres integration tests with py38 in "fail fast" mode.
make integration
```
> These make targets assume you have a recent version of [`tox`](https://tox.readthedocs.io/en/latest/) installed locally,
> unless you use choose a Docker container to run tests. Run `make help` for more info.
Check out the other targets in the Makefile to see other commonly used test
suites.
#### `tox`
[`tox`](https://tox.readthedocs.io/en/latest/) takes care of managing virtualenvs and install dependencies in order to run
tests. You can also run tests in parallel, for example, you can run unit tests
for Python 3.6, Python 3.7, Python 3.8, `flake8` checks, and `mypy` checks in
parallel with `tox -p`. Also, you can run unit tests for specific python versions
with `tox -e py36`. The configuration for these tests in located in `tox.ini`.
#### `pytest`
Finally, you can also run a specific test or group of tests using [`pytest`](https://docs.pytest.org/en/latest/) directly. With a virtualenv
active and dev dependencies installed you can do things like:
```sh
# run specific postgres integration tests
python -m pytest -m profile_postgres test/integration/001_simple_copy_test
# run all unit tests in a file
python -m pytest test/unit/test_graph.py
# run a specific unit test
python -m pytest test/unit/test_graph.py::GraphTest::test__dependency_list
```
> [Here](https://docs.pytest.org/en/reorganize-docs/new-docs/user/commandlineuseful.html)
> is a list of useful command-line options for `pytest` to use while developing.
## Submitting a Pull Request
Fishtown Analytics provides a sandboxed Redshift, Snowflake, and BigQuery database for use in a CI environment. When pull requests are submitted to the `fishtown-analytics/dbt` repo, GitHub will trigger automated tests in CircleCI and Azure Pipelines.
A dbt maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
A `dbt` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
Once all tests are passing and your PR has been approved, a dbt maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
Once all tests are passing and your PR has been approved, a `dbt` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:

View File

@@ -46,9 +46,7 @@ RUN curl -LO https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_V
&& tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \
&& rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz
RUN pip3 install -U "tox==3.14.4" wheel "six>=1.14.0,<1.15.0" "virtualenv==20.0.3" setuptools
# tox fails if the 'python' interpreter (python2) doesn't have `tox` installed
RUN pip install -U "tox==3.14.4" "six>=1.14.0,<1.15.0" "virtualenv==20.0.3" setuptools
RUN pip3 install -U tox wheel six setuptools
# These args are passed in via docker-compose, which reads then from the .env file.
# On Linux, run `make .env` to create the .env file for the current user.

101
Makefile
View File

@@ -1,29 +1,81 @@
.PHONY: install test test-unit test-integration
.DEFAULT_GOAL:=help
changed_tests := `git status --porcelain | grep '^\(M\| M\|A\| A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'`
# Optional flag to run target in a docker container.
# (example `make test USE_DOCKER=true`)
ifeq ($(USE_DOCKER),true)
DOCKER_CMD := docker-compose run --rm test
endif
install:
pip install -e .
.PHONY: dev
dev: ## Installs dbt-* packages in develop mode along with development dependencies.
pip install -r dev-requirements.txt -r editable-requirements.txt
test: .env
@echo "Full test run starting..."
@time docker-compose run test tox
.PHONY: mypy
mypy: .env ## Runs mypy for static type checking.
$(DOCKER_CMD) tox -e mypy
test-unit: .env
@echo "Unit test run starting..."
@time docker-compose run test tox -e unit-py36,flake8
.PHONY: flake8
flake8: .env ## Runs flake8 to enforce style guide.
$(DOCKER_CMD) tox -e flake8
test-integration: .env
@echo "Integration test run starting..."
@time docker-compose run test tox -e integration-postgres-py36,integration-redshift-py36,integration-snowflake-py36,integration-bigquery-py36
.PHONY: lint
lint: .env ## Runs all code checks in parallel.
$(DOCKER_CMD) tox -p -e flake8,mypy
test-quick: .env
@echo "Integration test run starting..."
@time docker-compose run test tox -e integration-postgres-py36 -- -x
.PHONY: unit
unit: .env ## Runs unit tests with py38.
$(DOCKER_CMD) tox -e py38
.PHONY: test
test: .env ## Runs unit tests with py38 and code checks in parallel.
$(DOCKER_CMD) tox -p -e py38,flake8,mypy
.PHONY: integration
integration: .env integration-postgres ## Alias for integration-postgres.
.PHONY: integration-fail-fast
integration-fail-fast: .env integration-postgres-fail-fast ## Alias for integration-postgres-fail-fast.
.PHONY: integration-postgres
integration-postgres: .env ## Runs postgres integration tests with py38.
$(DOCKER_CMD) tox -e py38-postgres -- -nauto
.PHONY: integration-postgres-fail-fast
integration-postgres-fail-fast: .env ## Runs postgres integration tests with py38 in "fail fast" mode.
$(DOCKER_CMD) tox -e py38-postgres -- -x -nauto
.PHONY: integration-redshift
integration-redshift: .env ## Runs redshift integration tests with py38.
$(DOCKER_CMD) tox -e py38-redshift -- -nauto
.PHONY: integration-redshift-fail-fast
integration-redshift-fail-fast: .env ## Runs redshift integration tests with py38 in "fail fast" mode.
$(DOCKER_CMD) tox -e py38-redshift -- -x -nauto
.PHONY: integration-snowflake
integration-snowflake: .env ## Runs snowflake integration tests with py38.
$(DOCKER_CMD) tox -e py38-snowflake -- -nauto
.PHONY: integration-snowflake-fail-fast
integration-snowflake-fail-fast: .env ## Runs snowflake integration tests with py38 in "fail fast" mode.
$(DOCKER_CMD) tox -e py38-snowflake -- -x -nauto
.PHONY: integration-bigquery
integration-bigquery: .env ## Runs bigquery integration tests with py38.
$(DOCKER_CMD) tox -e py38-bigquery -- -nauto
.PHONY: integration-bigquery-fail-fast
integration-bigquery-fail-fast: .env ## Runs bigquery integration tests with py38 in "fail fast" mode.
$(DOCKER_CMD) tox -e py38-bigquery -- -x -nauto
.PHONY: setup-db
setup-db: ## Setup Postgres database with docker-compose for system testing.
docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
# This rule creates a file named .env that is used by docker-compose for passing
# the USER_ID and GROUP_ID arguments to the Docker image.
.env:
.env: ## Setup step for using using docker-compose with make target.
@touch .env
ifneq ($(OS),Windows_NT)
ifneq ($(shell uname -s), Darwin)
@@ -31,9 +83,9 @@ ifneq ($(shell uname -s), Darwin)
@echo GROUP_ID=$(shell id -g) >> .env
endif
endif
@time docker-compose build
clean:
.PHONY: clean
clean: ## Resets development environment.
rm -f .coverage
rm -rf .eggs/
rm -f .env
@@ -47,3 +99,14 @@ clean:
rm -rf target/
find . -type f -name '*.pyc' -delete
find . -type d -name '__pycache__' -depth -delete
.PHONY: help
help: ## Show this help message.
@echo 'usage: make [target] [USE_DOCKER=true]'
@echo
@echo 'targets:'
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
@echo
@echo 'options:'
@echo 'use USE_DOCKER=true to run target in a docker container'

View File

@@ -1,5 +1,5 @@
<p align="center">
<img src="/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
<img src="https://raw.githubusercontent.com/fishtown-analytics/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
</p>
<p align="center">
<a href="https://codeclimate.com/github/fishtown-analytics/dbt">
@@ -20,7 +20,7 @@
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
![dbt architecture](https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-arch.png?raw=true)
![dbt architecture](https://raw.githubusercontent.com/fishtown-analytics/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-arch.png)
dbt can be used to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
@@ -30,7 +30,7 @@ Analysts using dbt can transform their data by simply writing select statements,
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
![dbt dag](https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-dag.png?raw=true)
![dbt dag](https://raw.githubusercontent.com/fishtown-analytics/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-dag.png)
## Getting started
@@ -51,7 +51,7 @@ These select statements, or "models", form a dbt project. Models frequently buil
## Reporting bugs and contributing code
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt/issues/new).
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](/CONTRIBUTING.md)
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](https://github.com/fishtown-analytics/dbt/blob/HEAD/CONTRIBUTING.md)
## Code of Conduct

View File

@@ -1,92 +0,0 @@
### Release Procedure :shipit:
#### Branching Strategy
dbt has three types of branches:
- **Trunks** track the latest release of a minor version of dbt. Historically, we used the `master` branch as the trunk. Each minor version release has a corresponding trunk. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of dbt.
- **Release Branches** track a specific, not yet complete release of dbt. These releases are codenamed since we don't always know what their semantic version will be. Example: `dev/lucretia-mott` became `0.11.1`.
- **Feature Branches** track individual features and fixes. On completion they should be merged into a release branch.
#### Git & PyPI
1. Update CHANGELOG.md with the most recent changes
2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`)
3. Bump the version using `bumpversion`:
- Dry run first by running `bumpversion --new-version <desired-version> <part>` and checking the diff. If it looks correct, clean up the chanages and move on:
- Alpha releases: `bumpversion --commit --no-tag --new-version 0.10.2a1 num`
- Patch releases: `bumpversion --commit --no-tag --new-version 0.10.2 patch`
- Minor releases: `bumpversion --commit --no-tag --new-version 0.11.0 minor`
- Major releases: `bumpversion --commit --no-tag --new-version 1.0.0 major`
4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`.
5. Update the default branch to the next dev release branch.
6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!!
7. Deploy to pypi
- `twine upload dist/*`
8. Deploy to homebrew (see below)
9. Deploy to conda-forge (see below)
10. Git release notes (points to changelog)
11. Post to slack (point to changelog)
After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward.
In some cases, where the branches have diverged wildly, it's ok to skip this step. But this means that the changes you just released won't be included in future releases.
#### Homebrew Release Process
1. Clone the `homebrew-dbt` repository:
```
git clone git@github.com:fishtown-analytics/homebrew-dbt.git
```
2. For ALL releases (prereleases and version releases), copy the relevant formula. To copy from the latest version release of dbt, do:
```bash
cp Formula/dbt.rb Formula/dbt@{NEW-VERSION}.rb
```
To copy from a different version, simply copy the corresponding file.
3. Open the file, and edit the following:
- the name of the ruby class: this is important, homebrew won't function properly if the class name is wrong. Check historical versions to figure out the right name.
- under the `bottle` section, remove all of the hashes (lines starting with `sha256`)
4. Create a **Python 3.7** virtualenv, activate it, and then install two packages: `homebrew-pypi-poet`, and the version of dbt you are preparing. I use:
```
pyenv virtualenv 3.7.0 homebrew-dbt-{VERSION}
pyenv activate homebrew-dbt-{VERSION}
pip install dbt=={VERSION} homebrew-pypi-poet
```
homebrew-pypi-poet is a program that generates a valid homebrew formula for an installed pip package. You want to use it to generate a diff against the existing formula. Then you want to apply the diff for the dependency packages only -- e.g. it will tell you that `google-api-core` has been updated and that you need to use the latest version.
5. reinstall, test, and audit dbt. if the test or audit fails, fix the formula with step 1.
```bash
brew uninstall --force Formula/{YOUR-FILE}.rb
brew install Formula/{YOUR-FILE}.rb
brew test dbt
brew audit --strict dbt
```
6. Ask Connor to bottle the change (only his laptop can do it!)
#### Conda Forge Release Process
1. Clone the fork of `conda-forge/dbt-feedstock` [here](https://github.com/fishtown-analytics/dbt-feedstock)
```bash
git clone git@github.com:fishtown-analytics/dbt-feedstock.git
```
2. Update the version and sha256 in `recipe/meta.yml`. To calculate the sha256, run:
```bash
wget https://github.com/fishtown-analytics/dbt/archive/v{version}.tar.gz
openssl sha256 v{version}.tar.gz
```
3. Push the changes and create a PR against `conda-forge/dbt-feedstock`
4. Confirm that all automated conda-forge tests are passing

View File

@@ -6,8 +6,8 @@
trigger:
branches:
include:
- master
- dev/*
- develop
- '*.latest'
- pr/*
jobs:
@@ -23,7 +23,7 @@ jobs:
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-unit
- script: python -m tox -e py -- -v
displayName: Run unit tests
- job: PostgresIntegrationTest
@@ -54,7 +54,7 @@ jobs:
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-postgres
- script: python -m tox -e py-postgres -- -v -n4
displayName: Run integration tests
# These three are all similar except secure environment variables, which MUST be passed along to their tasks,
@@ -73,7 +73,7 @@ jobs:
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-snowflake
- script: python -m tox -e py-snowflake -- -v -n4
env:
SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT)
SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD)
@@ -96,7 +96,7 @@ jobs:
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-bigquery
- script: python -m tox -e py-bigquery -- -v -n4
env:
BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON)
displayName: Run integration tests
@@ -115,7 +115,7 @@ jobs:
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-redshift
- script: python -m tox -e py-redshift -- -v -n4
env:
REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME)
REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS)
@@ -139,7 +139,7 @@ jobs:
inputs:
versionSpec: '3.7'
architecture: 'x64'
- script: python -m pip install --upgrade pip setuptools && python -m pip install -r requirements.txt && python -m pip install -r dev_requirements.txt
- script: python -m pip install --upgrade pip setuptools && python -m pip install -r requirements.txt && python -m pip install -r dev-requirements.txt
displayName: Install dependencies
- task: ShellScript@2
inputs:

View File

@@ -1,14 +1,12 @@
from dataclasses import dataclass
import re
from hologram import JsonSchemaMixin
from dbt.exceptions import RuntimeException
from typing import Dict, ClassVar, Any, Optional
from dbt.exceptions import RuntimeException
@dataclass
class Column(JsonSchemaMixin):
class Column:
TYPE_LABELS: ClassVar[Dict[str, str]] = {
'STRING': 'TEXT',
'TIMESTAMP': 'TIMESTAMP',

View File

@@ -4,14 +4,15 @@ import os
from multiprocessing.synchronize import RLock
from threading import get_ident
from typing import (
Dict, Tuple, Hashable, Optional, ContextManager, List
Dict, Tuple, Hashable, Optional, ContextManager, List, Union
)
import agate
import dbt.exceptions
from dbt.contracts.connection import (
Connection, Identifier, ConnectionState, AdapterRequiredConfig, LazyHandle
Connection, Identifier, ConnectionState,
AdapterRequiredConfig, LazyHandle, AdapterResponse
)
from dbt.contracts.graph.manifest import Manifest
from dbt.adapters.base.query_headers import (
@@ -290,7 +291,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
@abc.abstractmethod
def execute(
self, sql: str, auto_begin: bool = False, fetch: bool = False
) -> Tuple[str, agate.Table]:
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
"""Execute the given SQL.
:param str sql: The sql to execute.
@@ -298,7 +299,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
transaction, automatically begin one.
:param bool fetch: If set, fetch results.
:return: A tuple of the status and the results (empty if fetch=False).
:rtype: Tuple[str, agate.Table]
:rtype: Tuple[Union[str, AdapterResponse], agate.Table]
"""
raise dbt.exceptions.NotImplementedException(
'`execute` is not implemented for this adapter!'

View File

@@ -28,14 +28,14 @@ from dbt.clients.jinja import MacroGenerator
from dbt.contracts.graph.compiled import (
CompileResultNode, CompiledSeedNode
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.manifest import Manifest, MacroManifest
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.exceptions import warn_or_error
from dbt.node_types import NodeType
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import filter_null_values, executor
from dbt.adapters.base.connections import Connection
from dbt.adapters.base.connections import Connection, AdapterResponse
from dbt.adapters.base.meta import AdapterMeta, available
from dbt.adapters.base.relation import (
ComponentName, BaseRelation, InformationSchema, SchemaSearchMap
@@ -160,7 +160,7 @@ class BaseAdapter(metaclass=AdapterMeta):
self.config = config
self.cache = RelationsCache()
self.connections = self.ConnectionManager(config)
self._macro_manifest_lazy: Optional[Manifest] = None
self._macro_manifest_lazy: Optional[MacroManifest] = None
###
# Methods that pass through to the connection manager
@@ -213,7 +213,7 @@ class BaseAdapter(metaclass=AdapterMeta):
@available.parse(lambda *a, **k: ('', empty_table()))
def execute(
self, sql: str, auto_begin: bool = False, fetch: bool = False
) -> Tuple[str, agate.Table]:
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
"""Execute the given SQL. This is a thin wrapper around
ConnectionManager.execute.
@@ -222,7 +222,7 @@ class BaseAdapter(metaclass=AdapterMeta):
transaction, automatically begin one.
:param bool fetch: If set, fetch results.
:return: A tuple of the status and the results (empty if fetch=False).
:rtype: Tuple[str, agate.Table]
:rtype: Tuple[Union[str, AdapterResponse], agate.Table]
"""
return self.connections.execute(
sql=sql,
@@ -259,22 +259,22 @@ class BaseAdapter(metaclass=AdapterMeta):
return cls.ConnectionManager.TYPE
@property
def _macro_manifest(self) -> Manifest:
def _macro_manifest(self) -> MacroManifest:
if self._macro_manifest_lazy is None:
return self.load_macro_manifest()
return self._macro_manifest_lazy
def check_macro_manifest(self) -> Optional[Manifest]:
def check_macro_manifest(self) -> Optional[MacroManifest]:
"""Return the internal manifest (used for executing macros) if it's
been initialized, otherwise return None.
"""
return self._macro_manifest_lazy
def load_macro_manifest(self) -> Manifest:
def load_macro_manifest(self) -> MacroManifest:
if self._macro_manifest_lazy is None:
# avoid a circular import
from dbt.parser.manifest import load_macro_manifest
manifest = load_macro_manifest(
from dbt.parser.manifest import ManifestLoader
manifest = ManifestLoader.load_macros(
self.config, self.connections.set_query_header
)
self._macro_manifest_lazy = manifest

View File

@@ -21,8 +21,8 @@ Self = TypeVar('Self', bound='BaseRelation')
@dataclass(frozen=True, eq=False, repr=False)
class BaseRelation(FakeAPIObject, Hashable):
type: Optional[RelationType]
path: Path
type: Optional[RelationType] = None
quote_character: str = '"'
include_policy: Policy = Policy()
quote_policy: Policy = Policy()
@@ -45,7 +45,7 @@ class BaseRelation(FakeAPIObject, Hashable):
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.to_dict() == other.to_dict()
return self.to_dict(omit_none=True) == other.to_dict(omit_none=True)
@classmethod
def get_default_quote_policy(cls) -> Policy:
@@ -185,10 +185,10 @@ class BaseRelation(FakeAPIObject, Hashable):
def create_from_source(
cls: Type[Self], source: ParsedSourceDefinition, **kwargs: Any
) -> Self:
source_quoting = source.quoting.to_dict()
source_quoting = source.quoting.to_dict(omit_none=True)
source_quoting.pop('column', None)
quote_policy = deep_merge(
cls.get_default_quote_policy().to_dict(),
cls.get_default_quote_policy().to_dict(omit_none=True),
source_quoting,
kwargs.get('quote_policy', {}),
)
@@ -203,7 +203,7 @@ class BaseRelation(FakeAPIObject, Hashable):
@staticmethod
def add_ephemeral_prefix(name: str):
return f'__dbt__CTE__{name}'
return f'__dbt__cte__{name}'
@classmethod
def create_ephemeral_from_node(

View File

@@ -7,7 +7,9 @@ from typing_extensions import Protocol
import agate
from dbt.contracts.connection import Connection, AdapterRequiredConfig
from dbt.contracts.connection import (
Connection, AdapterRequiredConfig, AdapterResponse
)
from dbt.contracts.graph.compiled import (
CompiledNode, ManifestNode, NonSourceCompiledNode
)
@@ -154,7 +156,7 @@ class AdapterProtocol(
def execute(
self, sql: str, auto_begin: bool = False, fetch: bool = False
) -> Tuple[str, agate.Table]:
) -> Tuple[Union[str, AdapterResponse], agate.Table]:
...
def get_compiler(self) -> Compiler_T:

View File

@@ -1,13 +1,15 @@
import abc
import time
from typing import List, Optional, Tuple, Any, Iterable, Dict
from typing import List, Optional, Tuple, Any, Iterable, Dict, Union
import agate
import dbt.clients.agate_helper
import dbt.exceptions
from dbt.adapters.base import BaseConnectionManager
from dbt.contracts.connection import Connection, ConnectionState
from dbt.contracts.connection import (
Connection, ConnectionState, AdapterResponse
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import flags
@@ -18,7 +20,7 @@ class SQLConnectionManager(BaseConnectionManager):
Methods to implement:
- exception_handler
- cancel
- get_status
- get_response
- open
"""
@abc.abstractmethod
@@ -76,20 +78,19 @@ class SQLConnectionManager(BaseConnectionManager):
cursor = connection.handle.cursor()
cursor.execute(sql, bindings)
logger.debug(
"SQL status: {status} in {elapsed:0.2f} seconds",
status=self.get_status(cursor),
status=self.get_response(cursor),
elapsed=(time.time() - pre)
)
return connection, cursor
@abc.abstractclassmethod
def get_status(cls, cursor: Any) -> str:
def get_response(cls, cursor: Any) -> Union[AdapterResponse, str]:
"""Get the status of the cursor."""
raise dbt.exceptions.NotImplementedException(
'`get_status` is not implemented for this adapter!'
'`get_response` is not implemented for this adapter!'
)
@classmethod
@@ -98,7 +99,14 @@ class SQLConnectionManager(BaseConnectionManager):
column_names: Iterable[str],
rows: Iterable[Any]
) -> List[Dict[str, Any]]:
unique_col_names = dict()
for idx in range(len(column_names)):
col_name = column_names[idx]
if col_name in unique_col_names:
unique_col_names[col_name] += 1
column_names[idx] = f'{col_name}_{unique_col_names[col_name]}'
else:
unique_col_names[column_names[idx]] = 1
return [dict(zip(column_names, row)) for row in rows]
@classmethod
@@ -118,15 +126,15 @@ class SQLConnectionManager(BaseConnectionManager):
def execute(
self, sql: str, auto_begin: bool = False, fetch: bool = False
) -> Tuple[str, agate.Table]:
) -> Tuple[Union[AdapterResponse, str], agate.Table]:
sql = self._add_query_comment(sql)
_, cursor = self.add_query(sql, auto_begin)
status = self.get_status(cursor)
response = self.get_response(cursor)
if fetch:
table = self.get_result_from_cursor(cursor)
else:
table = dbt.clients.agate_helper.empty_table()
return status, table
return response, table
def add_begin_query(self):
return self.add_query('BEGIN', auto_begin=False)

View File

@@ -6,11 +6,19 @@ from dbt.logger import GLOBAL_LOGGER as logger
import dbt.exceptions
def clone(repo, cwd, dirname=None, remove_git_dir=False, branch=None):
def _is_commit(revision: str) -> bool:
# match SHA-1 git commit
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None):
has_revision = revision is not None
is_commit = _is_commit(revision or "")
clone_cmd = ['git', 'clone', '--depth', '1']
if branch is not None:
clone_cmd.extend(['--branch', branch])
if has_revision and not is_commit:
clone_cmd.extend(['--branch', revision])
clone_cmd.append(repo)
@@ -31,33 +39,38 @@ def list_tags(cwd):
return tags
def _checkout(cwd, repo, branch):
logger.debug(' Checking out branch {}.'.format(branch))
def _checkout(cwd, repo, revision):
logger.debug(' Checking out revision {}.'.format(revision))
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
run_cmd(cwd, ['git', 'fetch', '--tags', '--depth', '1', 'origin', branch])
fetch_cmd = ["git", "fetch", "origin", "--depth", "1"]
tags = list_tags(cwd)
# Prefer tags to branches if one exists
if branch in tags:
spec = 'tags/{}'.format(branch)
if _is_commit(revision):
run_cmd(cwd, fetch_cmd + [revision])
else:
spec = 'origin/{}'.format(branch)
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', revision])
run_cmd(cwd, fetch_cmd + ["--tags", revision])
if _is_commit(revision):
spec = revision
# Prefer tags to branches if one exists
elif revision in list_tags(cwd):
spec = 'tags/{}'.format(revision)
else:
spec = 'origin/{}'.format(revision)
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec],
env={'LC_ALL': 'C'})
return out, err
def checkout(cwd, repo, branch=None):
if branch is None:
branch = 'master'
def checkout(cwd, repo, revision=None):
if revision is None:
revision = 'HEAD'
try:
return _checkout(cwd, repo, branch)
return _checkout(cwd, repo, revision)
except dbt.exceptions.CommandResultError as exc:
stderr = exc.stderr.decode('utf-8').strip()
dbt.exceptions.bad_package_spec(repo, branch, stderr)
dbt.exceptions.bad_package_spec(repo, revision, stderr)
def get_current_sha(cwd):
@@ -71,7 +84,7 @@ def remove_remote(cwd):
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
branch=None):
revision=None):
exists = None
try:
_, err = clone(repo, cwd, dirname=dirname,
@@ -97,7 +110,7 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
logger.debug('Pulling new dependency {}.', directory)
full_path = os.path.join(cwd, directory)
start_sha = get_current_sha(full_path)
checkout(full_path, repo, branch)
checkout(full_path, repo, revision)
end_sha = get_current_sha(full_path)
if exists:
if start_sha == end_sha:

View File

@@ -21,7 +21,7 @@ import jinja2.sandbox
from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
deep_map
get_test_macro_name, deep_map
)
from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
@@ -231,6 +231,7 @@ class BaseMacroGenerator:
template = self.get_template()
# make the module. previously we set both vars and local, but that's
# redundant: They both end up in the same place
# make_module is in jinja2.environment. It returns a TemplateModule
module = template.make_module(vars=self.context, shared=False)
macro = module.__dict__[get_dbt_macro_name(name)]
module.__dict__.update(self.context)
@@ -244,6 +245,7 @@ class BaseMacroGenerator:
raise_compiler_error(str(e))
def call_macro(self, *args, **kwargs):
# called from __call__ methods
if self.context is None:
raise InternalException(
'Context is still None in call_macro!'
@@ -306,8 +308,10 @@ class MacroGenerator(BaseMacroGenerator):
e.stack.append(self.macro)
raise e
# This adds the macro's unique id to the node's 'depends_on'
@contextmanager
def track_call(self):
# This is only called from __call__
if self.stack is None or self.node is None:
yield
else:
@@ -322,6 +326,7 @@ class MacroGenerator(BaseMacroGenerator):
finally:
self.stack.pop(unique_id)
# this makes MacroGenerator objects callable like functions
def __call__(self, *args, **kwargs):
with self.track_call():
return self.call_macro(*args, **kwargs)
@@ -403,6 +408,21 @@ class DocumentationExtension(jinja2.ext.Extension):
return node
class TestExtension(jinja2.ext.Extension):
tags = ['test']
def parse(self, parser):
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
test_name = parser.parse_assign_target(name_only=True).name
parser.parse_signature(node)
node.defaults = []
node.name = get_test_macro_name(test_name)
node.body = parser.parse_statements(('name:endtest',),
drop_needle=True)
return node
def _is_dunder_name(name):
return name.startswith('__') and name.endswith('__')
@@ -474,6 +494,7 @@ def get_environment(
args['extensions'].append(MaterializationExtension)
args['extensions'].append(DocumentationExtension)
args['extensions'].append(TestExtension)
env_cls: Type[jinja2.Environment]
text_filter: Type
@@ -642,3 +663,39 @@ def add_rendered_test_kwargs(
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
context[SCHEMA_TEST_KWARGS_NAME] = kwargs
def statically_extract_macro_calls(string, ctx):
# set 'capture_macros' to capture undefined
env = get_environment(None, capture_macros=True)
parsed = env.parse(string)
standard_calls = {
'source': [],
'ref': [],
'config': [],
}
possible_macro_calls = []
for func_call in parsed.find_all(jinja2.nodes.Call):
if hasattr(func_call, 'node') and hasattr(func_call.node, 'name'):
func_name = func_call.node.name
else:
# This is a kludge to capture an adapter.dispatch('<macro_name>') call.
# Call(node=Getattr(
# node=Name(name='adapter', ctx='load'), attr='dispatch', ctx='load'),
# args=[Const(value='get_snapshot_unique_id')], kwargs=[],
# dyn_args=None, dyn_kwargs=None)
if (hasattr(func_call, 'node') and hasattr(func_call.node, 'attr') and
func_call.node.attr == 'dispatch'):
func_name = func_call.args[0].value
else:
continue
if func_name in standard_calls:
continue
elif ctx.get(func_name):
continue
else:
possible_macro_calls.append(func_name)
return possible_macro_calls

View File

@@ -438,7 +438,9 @@ def run_cmd(
return out, err
def download(url: str, path: str, timeout: Union[float, tuple] = None) -> None:
def download(
url: str, path: str, timeout: Optional[Union[float, tuple]] = None
) -> None:
path = convert_path(path)
connection_timeout = timeout or float(os.getenv('DBT_HTTP_TIMEOUT', 10))
response = requests.get(url, timeout=connection_timeout)

View File

@@ -1,16 +1,19 @@
from typing import Any
import dbt.exceptions
import yaml
import yaml.scanner
# the C version is faster, but it doesn't always exist
YamlLoader: Any
try:
from yaml import CSafeLoader as YamlLoader
from yaml import (
CLoader as Loader,
CSafeLoader as SafeLoader,
CDumper as Dumper
)
except ImportError:
from yaml import SafeLoader as YamlLoader
from yaml import ( # type: ignore # noqa: F401
Loader, SafeLoader, Dumper
)
YAML_ERROR_MESSAGE = """
@@ -54,7 +57,7 @@ def contextualized_yaml_error(raw_contents, error):
def safe_load(contents):
return yaml.load(contents, Loader=YamlLoader)
return yaml.load(contents, Loader=SafeLoader)
def load_yaml_text(contents):

View File

@@ -12,7 +12,6 @@ from dbt.clients.system import make_directory
from dbt.context.providers import generate_runtime_model
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.compiled import (
CompiledDataTestNode,
CompiledSchemaTestNode,
COMPILED_TYPES,
GraphMemberNode,
@@ -30,6 +29,7 @@ from dbt.graph import Graph
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.utils import pluralize
import dbt.tracking
graph_file_name = 'graph.gpickle'
@@ -52,11 +52,17 @@ def print_compile_stats(stats):
NodeType.Operation: 'operation',
NodeType.Seed: 'seed file',
NodeType.Source: 'source',
NodeType.Exposure: 'exposure',
}
results = {k: 0 for k in names.keys()}
results.update(stats)
# create tracking event for resource_counts
if dbt.tracking.active_user is not None:
resource_counts = {k.pluralize(): v for k, v in results.items()}
dbt.tracking.track_resource_counts(resource_counts)
stat_line = ", ".join([
pluralize(ct, names.get(t)) for t, ct in results.items()
if t in names
@@ -81,6 +87,8 @@ def _generate_stats(manifest: Manifest):
for source in manifest.sources.values():
stats[source.resource_type] += 1
for exposure in manifest.exposures.values():
stats[exposure.resource_type] += 1
for macro in manifest.macros.values():
stats[macro.resource_type] += 1
return stats
@@ -135,7 +143,7 @@ class Linker:
"""
out_graph = self.graph.copy()
for node_id in self.graph.nodes():
data = manifest.expect(node_id).to_dict()
data = manifest.expect(node_id).to_dict(omit_none=True)
out_graph.add_node(node_id, **data)
nx.write_gpickle(out_graph, outfile)
@@ -188,11 +196,11 @@ class Compiler:
[
InjectedCTE(
id="cte_id_1",
sql="__dbt__CTE__ephemeral as (select * from table)",
sql="__dbt__cte__ephemeral as (select * from table)",
),
InjectedCTE(
id="cte_id_2",
sql="__dbt__CTE__events as (select id, type from events)",
sql="__dbt__cte__events as (select id, type from events)",
),
]
@@ -203,8 +211,8 @@ class Compiler:
This will spit out:
"with __dbt__CTE__ephemeral as (select * from table),
__dbt__CTE__events as (select id, type from events),
"with __dbt__cte__ephemeral as (select * from table),
__dbt__cte__events as (select id, type from events),
with internal_cte as (select * from sessions)
select * from internal_cte"
@@ -242,9 +250,6 @@ class Compiler:
return str(parsed)
def _get_dbt_test_name(self) -> str:
return 'dbt__CTE__INTERNAL_test'
# This method is called by the 'compile_node' method. Starting
# from the node that it is passed in, it will recursively call
# itself using the 'extra_ctes'. The 'ephemeral' models do
@@ -275,55 +280,49 @@ class Compiler:
# gathered and then "injected" into the model.
prepended_ctes: List[InjectedCTE] = []
dbt_test_name = self._get_dbt_test_name()
# extra_ctes are added to the model by
# RuntimeRefResolver.create_relation, which adds an
# extra_cte for every model relation which is an
# ephemeral model.
for cte in model.extra_ctes:
if cte.id == dbt_test_name:
sql = cte.sql
if cte.id not in manifest.nodes:
raise InternalException(
f'During compilation, found a cte reference that '
f'could not be resolved: {cte.id}'
)
cte_model = manifest.nodes[cte.id]
if not cte_model.is_ephemeral_model:
raise InternalException(f'{cte.id} is not ephemeral')
# This model has already been compiled, so it's been
# through here before
if getattr(cte_model, 'compiled', False):
assert isinstance(cte_model, tuple(COMPILED_TYPES.values()))
cte_model = cast(NonSourceCompiledNode, cte_model)
new_prepended_ctes = cte_model.extra_ctes
# if the cte_model isn't compiled, i.e. first time here
else:
if cte.id not in manifest.nodes:
raise InternalException(
f'During compilation, found a cte reference that '
f'could not be resolved: {cte.id}'
# This is an ephemeral parsed model that we can compile.
# Compile and update the node
cte_model = self._compile_node(
cte_model, manifest, extra_context)
# recursively call this method
cte_model, new_prepended_ctes = \
self._recursively_prepend_ctes(
cte_model, manifest, extra_context
)
cte_model = manifest.nodes[cte.id]
# Save compiled SQL file and sync manifest
self._write_node(cte_model)
manifest.sync_update_node(cte_model)
if not cte_model.is_ephemeral_model:
raise InternalException(f'{cte.id} is not ephemeral')
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
# This model has already been compiled, so it's been
# through here before
if getattr(cte_model, 'compiled', False):
assert isinstance(cte_model,
tuple(COMPILED_TYPES.values()))
cte_model = cast(NonSourceCompiledNode, cte_model)
new_prepended_ctes = cte_model.extra_ctes
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
sql = f' {new_cte_name} as (\n{cte_model.compiled_sql}\n)'
# if the cte_model isn't compiled, i.e. first time here
else:
# This is an ephemeral parsed model that we can compile.
# Compile and update the node
cte_model = self._compile_node(
cte_model, manifest, extra_context)
# recursively call this method
cte_model, new_prepended_ctes = \
self._recursively_prepend_ctes(
cte_model, manifest, extra_context
)
# Save compiled SQL file and sync manifest
self._write_node(cte_model)
manifest.sync_update_node(cte_model)
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
sql = f' {new_cte_name} as (\n{cte_model.compiled_sql}\n)'
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
_add_prepended_cte(prepended_ctes, InjectedCTE(id=cte.id, sql=sql))
# We don't save injected_sql into compiled sql for ephemeral models
# because it will cause problems with processing of subsequent models.
@@ -336,40 +335,12 @@ class Compiler:
model.compiled_sql = injected_sql
model.extra_ctes_injected = True
model.extra_ctes = prepended_ctes
model.validate(model.to_dict())
model.validate(model.to_dict(omit_none=True))
manifest.update_node(model)
return model, prepended_ctes
def _add_ctes(
self,
compiled_node: NonSourceCompiledNode,
manifest: Manifest,
extra_context: Dict[str, Any],
) -> NonSourceCompiledNode:
"""Insert the CTEs for the model."""
# for data tests, we need to insert a special CTE at the end of the
# list containing the test query, and then have the "real" query be a
# select count(*) from that model.
# the benefit of doing it this way is that _insert_ctes() can be
# rewritten for different adapters to handle databses that don't
# support CTEs, or at least don't have full support.
if isinstance(compiled_node, CompiledDataTestNode):
# the last prepend (so last in order) should be the data test body.
# then we can add our select count(*) from _that_ cte as the "real"
# compiled_sql, and do the regular prepend logic from CTEs.
name = self._get_dbt_test_name()
cte = InjectedCTE(
id=name,
sql=f' {name} as (\n{compiled_node.compiled_sql}\n)'
)
compiled_node.extra_ctes.append(cte)
compiled_node.compiled_sql = f'\nselect count(*) from {name}'
return compiled_node
# creates a compiled_node from the ManifestNode passed in,
# creates a "context" dictionary for jinja rendering,
# and then renders the "compiled_sql" using the node, the
@@ -385,7 +356,7 @@ class Compiler:
logger.debug("Compiling {}".format(node.unique_id))
data = node.to_dict()
data = node.to_dict(omit_none=True)
data.update({
'compiled': False,
'compiled_sql': None,
@@ -408,12 +379,6 @@ class Compiler:
compiled_node.compiled = True
# add ctes for specific test nodes, and also for
# possible future use in adapters
compiled_node = self._add_ctes(
compiled_node, manifest, extra_context
)
return compiled_node
def write_graph_file(self, linker: Linker, manifest: Manifest):

View File

@@ -2,7 +2,7 @@ from dataclasses import dataclass
from typing import Any, Dict, Optional, Tuple
import os
from hologram import ValidationError
from dbt.dataclass_schema import ValidationError
from dbt.clients.system import load_file_contents
from dbt.clients.yaml_helper import load_yaml_text
@@ -75,6 +75,7 @@ def read_user_config(directory: str) -> UserConfig:
if profile:
user_cfg = coerce_dict_str(profile.get('config', {}))
if user_cfg is not None:
UserConfig.validate(user_cfg)
return UserConfig.from_dict(user_cfg)
except (RuntimeException, ValidationError):
pass
@@ -110,8 +111,8 @@ class Profile(HasCredentials):
'credentials': self.credentials,
}
if serialize_credentials:
result['config'] = self.config.to_dict()
result['credentials'] = self.credentials.to_dict()
result['config'] = self.config.to_dict(omit_none=True)
result['credentials'] = self.credentials.to_dict(omit_none=True)
return result
def to_target_dict(self) -> Dict[str, Any]:
@@ -124,7 +125,7 @@ class Profile(HasCredentials):
'name': self.target_name,
'target_name': self.target_name,
'profile_name': self.profile_name,
'config': self.config.to_dict(),
'config': self.config.to_dict(omit_none=True),
})
return target
@@ -137,10 +138,10 @@ class Profile(HasCredentials):
def validate(self):
try:
if self.credentials:
self.credentials.to_dict(validate=True)
ProfileConfig.from_dict(
self.to_profile_info(serialize_credentials=True)
)
dct = self.credentials.to_dict(omit_none=True)
self.credentials.validate(dct)
dct = self.to_profile_info(serialize_credentials=True)
ProfileConfig.validate(dct)
except ValidationError as exc:
raise DbtProfileError(validator_error_message(exc)) from exc
@@ -160,7 +161,9 @@ class Profile(HasCredentials):
typename = profile.pop('type')
try:
cls = load_plugin(typename)
credentials = cls.from_dict(profile)
data = cls.translate_aliases(profile)
cls.validate(data)
credentials = cls.from_dict(data)
except (RuntimeException, ValidationError) as e:
msg = str(e) if isinstance(e, RuntimeException) else e.message
raise DbtProfileError(
@@ -233,6 +236,7 @@ class Profile(HasCredentials):
"""
if user_cfg is None:
user_cfg = {}
UserConfig.validate(user_cfg)
config = UserConfig.from_dict(user_cfg)
profile = cls(

View File

@@ -26,15 +26,12 @@ from dbt.version import get_installed_version
from dbt.utils import MultiDict
from dbt.node_types import NodeType
from dbt.config.selectors import SelectorDict
from dbt.contracts.project import (
Project as ProjectContract,
SemverString,
)
from dbt.contracts.project import PackageConfig
from hologram import ValidationError
from dbt.dataclass_schema import ValidationError
from .renderer import DbtProjectYamlRenderer
from .selectors import (
selector_config_from_data,
@@ -101,6 +98,7 @@ def package_config_from_data(packages_data: Dict[str, Any]):
packages_data = {'packages': []}
try:
PackageConfig.validate(packages_data)
packages = PackageConfig.from_dict(packages_data)
except ValidationError as e:
raise DbtProjectError(
@@ -306,7 +304,10 @@ class PartialProject(RenderComponents):
)
try:
cfg = ProjectContract.from_dict(rendered.project_dict)
ProjectContract.validate(rendered.project_dict)
cfg = ProjectContract.from_dict(
rendered.project_dict
)
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e
# name/version are required in the Project definition, so we can assume
@@ -346,18 +347,20 @@ class PartialProject(RenderComponents):
# break many things
quoting: Dict[str, Any] = {}
if cfg.quoting is not None:
quoting = cfg.quoting.to_dict()
quoting = cfg.quoting.to_dict(omit_none=True)
models: Dict[str, Any]
seeds: Dict[str, Any]
snapshots: Dict[str, Any]
sources: Dict[str, Any]
tests: Dict[str, Any]
vars_value: VarProvider
models = cfg.models
seeds = cfg.seeds
snapshots = cfg.snapshots
sources = cfg.sources
tests = cfg.tests
if cfg.vars is None:
vars_dict: Dict[str, Any] = {}
else:
@@ -407,6 +410,7 @@ class PartialProject(RenderComponents):
selectors=selectors,
query_comment=query_comment,
sources=sources,
tests=tests,
vars=vars_value,
config_version=cfg.config_version,
unrendered=unrendered,
@@ -512,6 +516,7 @@ class Project:
seeds: Dict[str, Any]
snapshots: Dict[str, Any]
sources: Dict[str, Any]
tests: Dict[str, Any]
vars: VarProvider
dbt_version: List[VersionSpecifier]
packages: Dict[str, Any]
@@ -570,6 +575,7 @@ class Project:
'seeds': self.seeds,
'snapshots': self.snapshots,
'sources': self.sources,
'tests': self.tests,
'vars': self.vars.to_dict(),
'require-dbt-version': [
v.to_version_string() for v in self.dbt_version
@@ -577,16 +583,17 @@ class Project:
'config-version': self.config_version,
})
if self.query_comment:
result['query-comment'] = self.query_comment.to_dict()
result['query-comment'] = \
self.query_comment.to_dict(omit_none=True)
if with_packages:
result.update(self.packages.to_dict())
result.update(self.packages.to_dict(omit_none=True))
return result
def validate(self):
try:
ProjectContract.from_dict(self.to_project_config())
ProjectContract.validate(self.to_project_config())
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e

View File

@@ -145,7 +145,7 @@ class DbtProjectYamlRenderer(BaseRenderer):
if first == 'vars':
return False
if first in {'seeds', 'models', 'snapshots', 'seeds'}:
if first in {'seeds', 'models', 'snapshots', 'tests'}:
keypath_parts = {
(k.lstrip('+') if isinstance(k, str) else k)
for k in keypath

View File

@@ -33,7 +33,7 @@ from dbt.exceptions import (
raise_compiler_error
)
from hologram import ValidationError
from dbt.dataclass_schema import ValidationError
def _project_quoting_dict(
@@ -78,7 +78,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
get_relation_class_by_name(profile.credentials.type)
.get_default_quote_policy()
.replace_dict(_project_quoting_dict(project, profile))
).to_dict()
).to_dict(omit_none=True)
cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}'))
@@ -110,6 +110,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
selectors=project.selectors,
query_comment=project.query_comment,
sources=project.sources,
tests=project.tests,
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
@@ -174,7 +175,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
:raises DbtProjectError: If the configuration fails validation.
"""
try:
Configuration.from_dict(self.serialize())
Configuration.validate(self.serialize())
except ValidationError as e:
raise DbtProjectError(validator_error_message(e)) from e
@@ -272,7 +273,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
return frozenset(paths)
def get_resource_config_paths(self) -> Dict[str, PathSet]:
"""Return a dictionary with 'seeds' and 'models' keys whose values are
"""Return a dictionary with resource type keys whose values are
lists of lists of strings, where each inner list of strings represents
a configured path in the resource.
"""
@@ -281,6 +282,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
'seeds': self._get_config_paths(self.seeds),
'snapshots': self._get_config_paths(self.snapshots),
'sources': self._get_config_paths(self.sources),
'tests': self._get_config_paths(self.tests),
}
def get_unused_resource_config_paths(
@@ -391,7 +393,7 @@ class UnsetConfig(UserConfig):
f"'UnsetConfig' object has no attribute {name}"
)
def to_dict(self):
def __post_serialize__(self, dct):
return {}
@@ -488,6 +490,7 @@ class UnsetProfileConfig(RuntimeConfig):
selectors=project.selectors,
query_comment=project.query_comment,
sources=project.sources,
tests=project.tests,
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,

View File

@@ -1,8 +1,9 @@
from pathlib import Path
from typing import Dict, Any
import yaml
from hologram import ValidationError
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, Loader, Dumper, load_yaml_text
)
from dbt.dataclass_schema import ValidationError
from .renderer import SelectorRenderer
@@ -11,7 +12,6 @@ from dbt.clients.system import (
path_exists,
resolve_path_from_base,
)
from dbt.clients.yaml_helper import load_yaml_text
from dbt.contracts.selection import SelectorFile
from dbt.exceptions import DbtSelectorsError, RuntimeException
from dbt.graph import parse_from_selectors_definition, SelectionSpec
@@ -30,9 +30,11 @@ Validator Error:
class SelectorConfig(Dict[str, SelectionSpec]):
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
def selectors_from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
try:
SelectorFile.validate(data)
selector_file = SelectorFile.from_dict(data)
selectors = parse_from_selectors_definition(selector_file)
except ValidationError as exc:
@@ -66,7 +68,7 @@ class SelectorConfig(Dict[str, SelectionSpec]):
f'Could not render selector data: {exc}',
result_type='invalid_selector',
) from exc
return cls.from_dict(rendered)
return cls.selectors_from_dict(rendered)
@classmethod
def from_path(
@@ -107,7 +109,7 @@ def selector_config_from_data(
selectors_data = {'selectors': []}
try:
selectors = SelectorConfig.from_dict(selectors_data)
selectors = SelectorConfig.selectors_from_dict(selectors_data)
except ValidationError as e:
raise DbtSelectorsError(
MALFORMED_SELECTOR_ERROR.format(error=str(e.message)),

View File

@@ -7,13 +7,14 @@ from typing import (
from dbt import flags
from dbt import tracking
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients import yaml_helper
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.version import __version__ as dbt_version
import yaml
# These modules are added to the context. Consider alternative
# approaches which will extend well to potentially many modules
import pytz
@@ -172,6 +173,7 @@ class BaseContext(metaclass=ContextMeta):
builtins[key] = value
return builtins
# no dbtClassMixin so this is not an actual override
def to_dict(self):
self._ctx['context'] = self._ctx
builtins = self.generate_builtins()
@@ -394,7 +396,7 @@ class BaseContext(metaclass=ContextMeta):
-- ["good"]
"""
try:
return yaml_helper.safe_load(value)
return safe_load(value)
except (AttributeError, ValueError, yaml.YAMLError):
return default
@@ -536,4 +538,5 @@ class BaseContext(metaclass=ContextMeta):
def generate_base_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
ctx = BaseContext(cli_vars)
# This is not a Mashumaro to_dict call
return ctx.to_dict()

View File

@@ -41,6 +41,8 @@ class UnrenderedConfig(ConfigSource):
model_configs = unrendered.get('snapshots')
elif resource_type == NodeType.Source:
model_configs = unrendered.get('sources')
elif resource_type == NodeType.Test:
model_configs = unrendered.get('tests')
else:
model_configs = unrendered.get('models')
@@ -61,6 +63,8 @@ class RenderedConfig(ConfigSource):
model_configs = self.project.snapshots
elif resource_type == NodeType.Source:
model_configs = self.project.sources
elif resource_type == NodeType.Test:
model_configs = self.project.tests
else:
model_configs = self.project.models
return model_configs
@@ -165,7 +169,7 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
# Calculate the defaults. We don't want to validate the defaults,
# because it might be invalid in the case of required config members
# (such as on snapshots!)
result = config_cls.from_dict({}, validate=False)
result = config_cls.from_dict({})
return result
def _update_from_config(
@@ -196,7 +200,7 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
base=base,
)
finalized = config.finalize_and_validate()
return finalized.to_dict()
return finalized.to_dict(omit_none=True)
class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]):

View File

@@ -77,4 +77,5 @@ def generate_runtime_docs(
current_project: str,
) -> Dict[str, Any]:
ctx = DocsRuntimeContext(config, target, manifest, current_project)
# This is not a Mashumaro to_dict call
return ctx.to_dict()

View File

@@ -0,0 +1,179 @@
from typing import (
Dict, MutableMapping, Optional
)
from dbt.contracts.graph.parsed import ParsedMacro
from dbt.exceptions import raise_duplicate_macro_name, raise_compiler_error
from dbt.include.global_project import PROJECT_NAME as GLOBAL_PROJECT_NAME
from dbt.clients.jinja import MacroGenerator
MacroNamespace = Dict[str, ParsedMacro]
# This class builds the MacroResolver by adding macros
# to various categories for finding macros in the right order,
# so that higher precedence macros are found first.
# This functionality is also provided by the MacroNamespace,
# but the intention is to eventually replace that class.
# This enables us to get the macro unique_id without
# processing every macro in the project.
# Note: the root project macros override everything in the
# dbt internal projects. External projects (dependencies) will
# use their own macros first, then pull from the root project
# followed by dbt internal projects.
class MacroResolver:
def __init__(
self,
macros: MutableMapping[str, ParsedMacro],
root_project_name: str,
internal_package_names,
) -> None:
self.root_project_name = root_project_name
self.macros = macros
# internal packages comes from get_adapter_package_names
self.internal_package_names = internal_package_names
# To be filled in from macros.
self.internal_packages: Dict[str, MacroNamespace] = {}
self.packages: Dict[str, MacroNamespace] = {}
self.root_package_macros: MacroNamespace = {}
# add the macros to internal_packages, packages, and root packages
self.add_macros()
self._build_internal_packages_namespace()
self._build_macros_by_name()
def _build_internal_packages_namespace(self):
# Iterate in reverse-order and overwrite: the packages that are first
# in the list are the ones we want to "win".
self.internal_packages_namespace: MacroNamespace = {}
for pkg in reversed(self.internal_package_names):
if pkg in self.internal_packages:
# Turn the internal packages into a flat namespace
self.internal_packages_namespace.update(
self.internal_packages[pkg])
# search order:
# local_namespace (package of particular node), not including
# the internal packages or the root package
# This means that within an extra package, it uses its own macros
# root package namespace
# non-internal packages (that aren't local or root)
# dbt internal packages
def _build_macros_by_name(self):
macros_by_name = {}
# all internal packages (already in the right order)
for macro in self.internal_packages_namespace.values():
macros_by_name[macro.name] = macro
# non-internal packages
for fnamespace in self.packages.values():
for macro in fnamespace.values():
macros_by_name[macro.name] = macro
# root package macros
for macro in self.root_package_macros.values():
macros_by_name[macro.name] = macro
self.macros_by_name = macros_by_name
def _add_macro_to(
self,
package_namespaces: Dict[str, MacroNamespace],
macro: ParsedMacro,
):
if macro.package_name in package_namespaces:
namespace = package_namespaces[macro.package_name]
else:
namespace = {}
package_namespaces[macro.package_name] = namespace
if macro.name in namespace:
raise_duplicate_macro_name(
macro, macro, macro.package_name
)
package_namespaces[macro.package_name][macro.name] = macro
def add_macro(self, macro: ParsedMacro):
macro_name: str = macro.name
# internal macros (from plugins) will be processed separately from
# project macros, so store them in a different place
if macro.package_name in self.internal_package_names:
self._add_macro_to(self.internal_packages, macro)
else:
# if it's not an internal package
self._add_macro_to(self.packages, macro)
# add to root_package_macros if it's in the root package
if macro.package_name == self.root_project_name:
self.root_package_macros[macro_name] = macro
def add_macros(self):
for macro in self.macros.values():
self.add_macro(macro)
def get_macro(self, local_package, macro_name):
local_package_macros = {}
if (local_package not in self.internal_package_names and
local_package in self.packages):
local_package_macros = self.packages[local_package]
# First: search the local packages for this macro
if macro_name in local_package_macros:
return local_package_macros[macro_name]
# Now look up in the standard search order
if macro_name in self.macros_by_name:
return self.macros_by_name[macro_name]
return None
def get_macro_id(self, local_package, macro_name):
macro = self.get_macro(local_package, macro_name)
if macro is None:
return None
else:
return macro.unique_id
# Currently this is just used by test processing in the schema
# parser (in connection with the MacroResolver). Future work
# will extend the use of these classes to other parsing areas.
# One of the features of this class compared to the MacroNamespace
# is that you can limit the number of macros provided to the
# context dictionary in the 'to_dict' manifest method.
class TestMacroNamespace:
def __init__(
self, macro_resolver, ctx, node, thread_ctx, depends_on_macros
):
self.macro_resolver = macro_resolver
self.ctx = ctx
self.node = node
self.thread_ctx = thread_ctx
local_namespace = {}
if depends_on_macros:
for macro_unique_id in depends_on_macros:
if macro_unique_id in self.macro_resolver.macros:
macro = self.macro_resolver.macros[macro_unique_id]
local_namespace[macro.name] = MacroGenerator(
macro, self.ctx, self.node, self.thread_ctx,
)
self.local_namespace = local_namespace
def get_from_package(
self, package_name: Optional[str], name: str
) -> Optional[MacroGenerator]:
macro = None
if package_name is None:
macro = self.macro_resolver.macros_by_name.get(name)
elif package_name == GLOBAL_PROJECT_NAME:
macro = self.macro_resolver.internal_packages_namespace.get(name)
elif package_name in self.macro_resolver.packages:
macro = self.macro_resolver.packages[package_name].get(name)
else:
raise_compiler_error(
f"Could not find package '{package_name}'"
)
if not macro:
return None
macro_func = MacroGenerator(
macro, self.ctx, self.node, self.thread_ctx
)
return macro_func

View File

@@ -15,13 +15,21 @@ NamespaceMember = Union[FlatNamespace, MacroGenerator]
FullNamespace = Dict[str, NamespaceMember]
# The point of this class is to collect the various macros
# and provide the ability to flatten them into the ManifestContexts
# that are created for jinja, so that macro calls can be resolved.
# Creates special iterators and _keys methods to flatten the lists.
# When this class is created it has a static 'local_namespace' which
# depends on the package of the node, so it only works for one
# particular local package at a time for "flattening" into a context.
# 'get_by_package' should work for any macro.
class MacroNamespace(Mapping):
def __init__(
self,
global_namespace: FlatNamespace,
local_namespace: FlatNamespace,
global_project_namespace: FlatNamespace,
packages: Dict[str, FlatNamespace],
global_namespace: FlatNamespace, # root package macros
local_namespace: FlatNamespace, # packages for *this* node
global_project_namespace: FlatNamespace, # internal packages
packages: Dict[str, FlatNamespace], # non-internal packages
):
self.global_namespace: FlatNamespace = global_namespace
self.local_namespace: FlatNamespace = local_namespace
@@ -29,20 +37,24 @@ class MacroNamespace(Mapping):
self.global_project_namespace: FlatNamespace = global_project_namespace
def _search_order(self) -> Iterable[Union[FullNamespace, FlatNamespace]]:
yield self.local_namespace
yield self.global_namespace
yield self.packages
yield self.local_namespace # local package
yield self.global_namespace # root package
yield self.packages # non-internal packages
yield {
GLOBAL_PROJECT_NAME: self.global_project_namespace,
GLOBAL_PROJECT_NAME: self.global_project_namespace, # dbt
}
yield self.global_project_namespace
yield self.global_project_namespace # other internal project besides dbt
# provides special keys method for MacroNamespace iterator
# returns keys from local_namespace, global_namespace, packages,
# global_project_namespace
def _keys(self) -> Set[str]:
keys: Set[str] = set()
for search in self._search_order():
keys.update(search)
return keys
# special iterator using special keys
def __iter__(self) -> Iterator[str]:
for key in self._keys():
yield key
@@ -72,6 +84,10 @@ class MacroNamespace(Mapping):
)
# This class builds the MacroNamespace by adding macros to
# internal_packages or packages, and locals/globals.
# Call 'build_namespace' to return a MacroNamespace.
# This is used by ManifestContext (and subclasses)
class MacroNamespaceBuilder:
def __init__(
self,
@@ -83,10 +99,17 @@ class MacroNamespaceBuilder:
) -> None:
self.root_package = root_package
self.search_package = search_package
# internal packages comes from get_adapter_package_names
self.internal_package_names = set(internal_packages)
self.internal_package_names_order = internal_packages
# macro_func is added here if in root package, since
# the root package acts as a "global" namespace, overriding
# everything else except local external package macro calls
self.globals: FlatNamespace = {}
# macro_func is added here if it's the package for this node
self.locals: FlatNamespace = {}
# Create a dictionary of [package name][macro name] =
# MacroGenerator object which acts like a function
self.internal_packages: Dict[str, FlatNamespace] = {}
self.packages: Dict[str, FlatNamespace] = {}
self.thread_ctx = thread_ctx
@@ -94,25 +117,28 @@ class MacroNamespaceBuilder:
def _add_macro_to(
self,
heirarchy: Dict[str, FlatNamespace],
hierarchy: Dict[str, FlatNamespace],
macro: ParsedMacro,
macro_func: MacroGenerator,
):
if macro.package_name in heirarchy:
namespace = heirarchy[macro.package_name]
if macro.package_name in hierarchy:
namespace = hierarchy[macro.package_name]
else:
namespace = {}
heirarchy[macro.package_name] = namespace
hierarchy[macro.package_name] = namespace
if macro.name in namespace:
raise_duplicate_macro_name(
macro_func.macro, macro, macro.package_name
)
heirarchy[macro.package_name][macro.name] = macro_func
hierarchy[macro.package_name][macro.name] = macro_func
def add_macro(self, macro: ParsedMacro, ctx: Dict[str, Any]):
macro_name: str = macro.name
# MacroGenerator is in clients/jinja.py
# a MacroGenerator object is a callable object that will
# execute the MacroGenerator.__call__ function
macro_func: MacroGenerator = MacroGenerator(
macro, ctx, self.node, self.thread_ctx
)
@@ -122,10 +148,12 @@ class MacroNamespaceBuilder:
if macro.package_name in self.internal_package_names:
self._add_macro_to(self.internal_packages, macro, macro_func)
else:
# if it's not an internal package
self._add_macro_to(self.packages, macro, macro_func)
# add to locals if it's the package this node is in
if macro.package_name == self.search_package:
self.locals[macro_name] = macro_func
# add to globals if it's in the root package
elif macro.package_name == self.root_package:
self.globals[macro_name] = macro_func
@@ -143,11 +171,12 @@ class MacroNamespaceBuilder:
global_project_namespace: FlatNamespace = {}
for pkg in reversed(self.internal_package_names_order):
if pkg in self.internal_packages:
# add the macros pointed to by this package name
global_project_namespace.update(self.internal_packages[pkg])
return MacroNamespace(
global_namespace=self.globals,
local_namespace=self.locals,
global_project_namespace=global_project_namespace,
packages=self.packages,
global_namespace=self.globals, # root package macros
local_namespace=self.locals, # packages for *this* node
global_project_namespace=global_project_namespace, # internal packages
packages=self.packages, # non internal_packages
)

View File

@@ -3,6 +3,7 @@ from typing import List
from dbt.clients.jinja import MacroStack
from dbt.contracts.connection import AdapterRequiredConfig
from dbt.contracts.graph.manifest import Manifest
from dbt.context.macro_resolver import TestMacroNamespace
from .configured import ConfiguredContext
@@ -24,12 +25,20 @@ class ManifestContext(ConfiguredContext):
) -> None:
super().__init__(config)
self.manifest = manifest
# this is the package of the node for which this context was built
self.search_package = search_package
self.macro_stack = MacroStack()
# This namespace is used by the BaseDatabaseWrapper in jinja rendering.
# The namespace is passed to it when it's constructed. It expects
# to be able to do: namespace.get_from_package(..)
self.namespace = self._build_namespace()
def _build_namespace(self):
# this takes all the macros in the manifest and adds them
# to the MacroNamespaceBuilder stored in self.namespace
builder = self._get_namespace_builder()
self.namespace = builder.build_namespace(
self.manifest.macros.values(),
self._ctx,
return builder.build_namespace(
self.manifest.macros.values(), self._ctx
)
def _get_namespace_builder(self) -> MacroNamespaceBuilder:
@@ -46,9 +55,15 @@ class ManifestContext(ConfiguredContext):
None,
)
# This does not use the Mashumaro code
def to_dict(self):
dct = super().to_dict()
dct.update(self.namespace)
# This moves all of the macros in the 'namespace' into top level
# keys in the manifest dictionary
if isinstance(self.namespace, TestMacroNamespace):
dct.update(self.namespace.local_namespace)
else:
dct.update(self.namespace)
return dct

View File

@@ -8,16 +8,22 @@ from typing_extensions import Protocol
from dbt import deprecations
from dbt.adapters.base.column import Column
from dbt.adapters.factory import get_adapter, get_adapter_package_names
from dbt.adapters.factory import (
get_adapter, get_adapter_package_names, get_adapter_type_names
)
from dbt.clients import agate_helper
from dbt.clients.jinja import get_rendered, MacroGenerator
from dbt.clients.jinja import get_rendered, MacroGenerator, MacroStack
from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
from dbt.contracts.graph.manifest import Manifest, Disabled
from dbt.contracts.connection import AdapterResponse
from dbt.contracts.graph.manifest import (
Manifest, Disabled
)
from dbt.contracts.graph.compiled import (
CompiledResource,
CompiledSeedNode,
@@ -83,6 +89,7 @@ class BaseDatabaseWrapper:
Wrapper for runtime database interaction. Applies the runtime quote policy
via a relation proxy.
"""
def __init__(self, adapter, namespace: MacroNamespace):
self._adapter = adapter
self.Relation = RelationProxy(adapter)
@@ -102,10 +109,11 @@ class BaseDatabaseWrapper:
return self._adapter.commit_if_has_connection()
def _get_adapter_macro_prefixes(self) -> List[str]:
# a future version of this could have plugins automatically call fall
# back to their dependencies' dependencies by using
# `get_adapter_type_names` instead of `[self.config.credentials.type]`
search_prefixes = [self._adapter.type(), 'default']
# order matters for dispatch:
# 1. current adapter
# 2. any parent adapters (dependencies)
# 3. 'default'
search_prefixes = get_adapter_type_names(self.config.credentials.type) + ['default']
return search_prefixes
def dispatch(
@@ -139,6 +147,7 @@ class BaseDatabaseWrapper:
for prefix in self._get_adapter_macro_prefixes():
search_name = f'{prefix}__{macro_name}'
try:
# this uses the namespace from the context
macro = self._namespace.get_from_package(
package_name, search_name
)
@@ -379,6 +388,7 @@ class ParseDatabaseWrapper(BaseDatabaseWrapper):
"""The parser subclass of the database wrapper applies any explicit
parse-time overrides.
"""
def __getattr__(self, name):
override = (name in self._adapter._available_ and
name in self._adapter._parse_replacements_)
@@ -399,6 +409,7 @@ class RuntimeDatabaseWrapper(BaseDatabaseWrapper):
"""The runtime database wrapper exposes everything the adapter marks
available.
"""
def __getattr__(self, name):
if name in self._adapter._available_:
return getattr(self._adapter, name)
@@ -634,10 +645,13 @@ class ProviderContext(ManifestContext):
self.context_config: Optional[ContextConfig] = context_config
self.provider: Provider = provider
self.adapter = get_adapter(self.config)
# The macro namespace is used in creating the DatabaseWrapper
self.db_wrapper = self.provider.DatabaseWrapper(
self.adapter, self.namespace
)
# This overrides the method in ManifestContext, and provides
# a model, which the ManifestContext builder does not
def _get_namespace_builder(self):
internal_packages = get_adapter_package_names(
self.config.credentials.type
@@ -660,18 +674,33 @@ class ProviderContext(ManifestContext):
@contextmember
def store_result(
self, name: str, status: Any, agate_table: Optional[agate.Table] = None
self, name: str,
response: Any,
agate_table: Optional[agate.Table] = None
) -> str:
if agate_table is None:
agate_table = agate_helper.empty_table()
self.sql_results[name] = AttrDict({
'status': status,
'response': response,
'data': agate_helper.as_matrix(agate_table),
'table': agate_table
})
return ''
@contextmember
def store_raw_result(
self,
name: str,
message=Optional[str],
code=Optional[str],
rows_affected=Optional[str],
agate_table: Optional[agate.Table] = None
) -> str:
response = AdapterResponse(
_message=message, code=code, rows_affected=rows_affected)
return self.store_result(name, response, agate_table)
@contextproperty
def validation(self):
def validate_any(*args) -> Callable[[T], None]:
@@ -1089,7 +1118,7 @@ class ProviderContext(ManifestContext):
@contextproperty('model')
def ctx_model(self) -> Dict[str, Any]:
return self.model.to_dict()
return self.model.to_dict(omit_none=True)
@contextproperty
def pre_hooks(self) -> Optional[List[Dict[str, Any]]]:
@@ -1179,6 +1208,7 @@ class MacroContext(ProviderContext):
- 'schema' does not use any 'model' information
- they can't be configured with config() directives
"""
def __init__(
self,
model: ParsedMacro,
@@ -1204,7 +1234,7 @@ class ModelContext(ProviderContext):
if isinstance(self.model, ParsedSourceDefinition):
return []
return [
h.to_dict() for h in self.model.config.pre_hook
h.to_dict(omit_none=True) for h in self.model.config.pre_hook
]
@contextproperty
@@ -1212,7 +1242,7 @@ class ModelContext(ProviderContext):
if isinstance(self.model, ParsedSourceDefinition):
return []
return [
h.to_dict() for h in self.model.config.post_hook
h.to_dict(omit_none=True) for h in self.model.config.post_hook
]
@contextproperty
@@ -1269,27 +1299,21 @@ class ModelContext(ProviderContext):
return self.db_wrapper.Relation.create_from(self.config, self.model)
# This is called by '_context_for', used in 'render_with_context'
def generate_parser_model(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
context_config: ContextConfig,
) -> Dict[str, Any]:
# The __init__ method of ModelContext also initializes
# a ManifestContext object which creates a MacroNamespaceBuilder
# which adds every macro in the Manifest.
ctx = ModelContext(
model, config, manifest, ParseProvider(), context_config
)
return ctx.to_dict()
def generate_parser_macro(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
package_name: Optional[str],
) -> Dict[str, Any]:
ctx = MacroContext(
macro, config, manifest, ParseProvider(), package_name
)
# The 'to_dict' method in ManifestContext moves all of the macro names
# in the macro 'namespace' up to top level keys
return ctx.to_dict()
@@ -1367,3 +1391,68 @@ def generate_parse_exposure(
manifest,
)
}
# This class is currently used by the schema parser in order
# to limit the number of macros in the context by using
# the TestMacroNamespace
class TestContext(ProviderContext):
def __init__(
self,
model,
config: RuntimeConfig,
manifest: Manifest,
provider: Provider,
context_config: Optional[ContextConfig],
macro_resolver: MacroResolver,
) -> None:
# this must be before super init so that macro_resolver exists for
# build_namespace
self.macro_resolver = macro_resolver
self.thread_ctx = MacroStack()
super().__init__(model, config, manifest, provider, context_config)
self._build_test_namespace()
# We need to rebuild this because it's already been built by
# the ProviderContext with the wrong namespace.
self.db_wrapper = self.provider.DatabaseWrapper(
self.adapter, self.namespace
)
def _build_namespace(self):
return {}
# this overrides _build_namespace in ManifestContext which provides a
# complete namespace of all macros to only specify macros in the depends_on
# This only provides a namespace with macros in the test node
# 'depends_on.macros' by using the TestMacroNamespace
def _build_test_namespace(self):
depends_on_macros = []
if self.model.depends_on and self.model.depends_on.macros:
depends_on_macros = self.model.depends_on.macros
lookup_macros = depends_on_macros.copy()
for macro_unique_id in lookup_macros:
lookup_macro = self.macro_resolver.macros.get(macro_unique_id)
if lookup_macro:
depends_on_macros.extend(lookup_macro.depends_on.macros)
macro_namespace = TestMacroNamespace(
self.macro_resolver, self._ctx, self.model, self.thread_ctx,
depends_on_macros
)
self.namespace = macro_namespace
def generate_test_context(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
context_config: ContextConfig,
macro_resolver: MacroResolver
) -> Dict[str, Any]:
ctx = TestContext(
model, config, manifest, ParseProvider(), context_config,
macro_resolver
)
# The 'to_dict' method in ManifestContext moves all of the macro names
# in the macro 'namespace' up to top level keys
return ctx.to_dict()

View File

@@ -2,26 +2,37 @@ import abc
import itertools
from dataclasses import dataclass, field
from typing import (
Any, ClassVar, Dict, Tuple, Iterable, Optional, NewType, List, Callable,
Any, ClassVar, Dict, Tuple, Iterable, Optional, List, Callable,
)
from typing_extensions import Protocol
from hologram import JsonSchemaMixin
from hologram.helpers import (
StrEnum, register_pattern, ExtensibleJsonSchemaMixin
)
from dbt.contracts.util import Replaceable
from dbt.exceptions import InternalException
from dbt.utils import translate_aliases
from dbt.logger import GLOBAL_LOGGER as logger
from typing_extensions import Protocol
from dbt.dataclass_schema import (
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
ValidatedStringMixin, register_pattern
)
from dbt.contracts.util import Replaceable
Identifier = NewType('Identifier', str)
class Identifier(ValidatedStringMixin):
ValidationRegex = r'^[A-Za-z_][A-Za-z0-9_]+$'
# we need register_pattern for jsonschema validation
register_pattern(Identifier, r'^[A-Za-z_][A-Za-z0-9_]+$')
@dataclass
class AdapterResponse(dbtClassMixin):
_message: str
code: Optional[str] = None
rows_affected: Optional[int] = None
def __str__(self):
return self._message
class ConnectionState(StrEnum):
INIT = 'init'
OPEN = 'open'
@@ -30,20 +41,19 @@ class ConnectionState(StrEnum):
@dataclass(init=False)
class Connection(ExtensibleJsonSchemaMixin, Replaceable):
class Connection(ExtensibleDbtClassMixin, Replaceable):
type: Identifier
name: Optional[str]
name: Optional[str] = None
state: ConnectionState = ConnectionState.INIT
transaction_open: bool = False
# prevent serialization
_handle: Optional[Any] = None
_credentials: JsonSchemaMixin = field(init=False)
_credentials: Optional[Any] = None
def __init__(
self,
type: Identifier,
name: Optional[str],
credentials: JsonSchemaMixin,
credentials: dbtClassMixin,
state: ConnectionState = ConnectionState.INIT,
transaction_open: bool = False,
handle: Optional[Any] = None,
@@ -85,6 +95,7 @@ class LazyHandle:
"""Opener must be a callable that takes a Connection object and opens the
connection, updating the handle on the Connection.
"""
def __init__(self, opener: Callable[[Connection], Connection]):
self.opener = opener
@@ -102,7 +113,7 @@ class LazyHandle:
# will work.
@dataclass # type: ignore
class Credentials(
ExtensibleJsonSchemaMixin,
ExtensibleDbtClassMixin,
Replaceable,
metaclass=abc.ABCMeta
):
@@ -121,7 +132,7 @@ class Credentials(
) -> Iterable[Tuple[str, Any]]:
"""Return an ordered iterator of key/value pairs for pretty-printing.
"""
as_dict = self.to_dict(omit_none=False, with_aliases=with_aliases)
as_dict = self.to_dict(omit_none=False)
connection_keys = set(self._connection_keys())
aliases: List[str] = []
if with_aliases:
@@ -137,9 +148,10 @@ class Credentials(
raise NotImplementedError
@classmethod
def from_dict(cls, data):
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
data = cls.translate_aliases(data)
return super().from_dict(data)
return data
@classmethod
def translate_aliases(
@@ -147,31 +159,26 @@ class Credentials(
) -> Dict[str, Any]:
return translate_aliases(kwargs, cls._ALIASES, recurse)
def to_dict(self, omit_none=True, validate=False, *, with_aliases=False):
serialized = super().to_dict(omit_none=omit_none, validate=validate)
if with_aliases:
serialized.update({
new_name: serialized[canonical_name]
def __post_serialize__(self, dct):
# no super() -- do we need it?
if self._ALIASES:
dct.update({
new_name: dct[canonical_name]
for new_name, canonical_name in self._ALIASES.items()
if canonical_name in serialized
if canonical_name in dct
})
return serialized
return dct
class UserConfigContract(Protocol):
send_anonymous_usage_stats: bool
use_colors: Optional[bool]
partial_parse: Optional[bool]
printer_width: Optional[int]
use_colors: Optional[bool] = None
partial_parse: Optional[bool] = None
printer_width: Optional[int] = None
def set_values(self, cookie_dir: str) -> None:
...
def to_dict(
self, omit_none: bool = True, validate: bool = False
) -> Dict[str, Any]:
...
class HasCredentials(Protocol):
credentials: Credentials
@@ -205,9 +212,10 @@ DEFAULT_QUERY_COMMENT = '''
@dataclass
class QueryComment(JsonSchemaMixin):
class QueryComment(HyphenatedDbtClassMixin):
comment: str = DEFAULT_QUERY_COMMENT
append: bool = False
job_label: bool = False
class AdapterRequiredConfig(HasCredentials, Protocol):

View File

@@ -3,7 +3,7 @@ import os
from dataclasses import dataclass, field
from typing import List, Optional, Union
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt.exceptions import InternalException
@@ -14,8 +14,20 @@ MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = '1MB'
class ParseFileType(StrEnum):
Macro = 'macro'
Model = 'model'
Snapshot = 'snapshot'
Analysis = 'analysis'
Test = 'test'
Seed = 'seed'
Documentation = 'docs'
Schema = 'schema'
Hook = 'hook'
@dataclass
class FilePath(JsonSchemaMixin):
class FilePath(dbtClassMixin):
searched_path: str
relative_path: str
project_root: str
@@ -51,7 +63,7 @@ class FilePath(JsonSchemaMixin):
@dataclass
class FileHash(JsonSchemaMixin):
class FileHash(dbtClassMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents
@@ -91,7 +103,7 @@ class FileHash(JsonSchemaMixin):
@dataclass
class RemoteFile(JsonSchemaMixin):
class RemoteFile(dbtClassMixin):
@property
def searched_path(self) -> str:
return 'from remote system'
@@ -110,10 +122,14 @@ class RemoteFile(JsonSchemaMixin):
@dataclass
class SourceFile(JsonSchemaMixin):
class SourceFile(dbtClassMixin):
"""Define a source file in dbt"""
path: Union[FilePath, RemoteFile] # the path information
checksum: FileHash
# Seems like knowing which project the file came from would be useful
project_name: Optional[str] = None
# Parse file type: i.e. which parser will process this file
parse_file_type: Optional[ParseFileType] = None
# we don't want to serialize this
_contents: Optional[str] = None
# the unique IDs contained in this file
@@ -156,7 +172,7 @@ class SourceFile(JsonSchemaMixin):
@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
"""Parse seeds over the size limit with just the path"""
self = cls(path=path, checksum=FileHash.path(path.absolute_path))
self = cls(path=path, checksum=FileHash.path(path.original_file_path))
self.contents = ''
return self

View File

@@ -19,19 +19,19 @@ from dbt.contracts.graph.parsed import (
from dbt.node_types import NodeType
from dbt.contracts.util import Replaceable
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin
from dataclasses import dataclass, field
from typing import Optional, List, Union, Dict, Type
@dataclass
class InjectedCTE(JsonSchemaMixin, Replaceable):
class InjectedCTE(dbtClassMixin, Replaceable):
id: str
sql: str
@dataclass
class CompiledNodeMixin(JsonSchemaMixin):
class CompiledNodeMixin(dbtClassMixin):
# this is a special mixin class to provide a required argument. If a node
# is missing a `compiled` flag entirely, it must not be a CompiledNode.
compiled: bool
@@ -178,8 +178,7 @@ def parsed_instance_for(compiled: CompiledNode) -> ParsedResource:
raise ValueError('invalid resource_type: {}'
.format(compiled.resource_type))
# validate=False to allow extra keys from compiling
return cls.from_dict(compiled.to_dict(), validate=False)
return cls.from_dict(compiled.to_dict(omit_none=True))
NonSourceCompiledNode = Union[

View File

@@ -15,19 +15,25 @@ from dbt.contracts.graph.compiled import (
)
from dbt.contracts.graph.parsed import (
ParsedMacro, ParsedDocumentation, ParsedNodePatch, ParsedMacroPatch,
ParsedSourceDefinition, ParsedExposure
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
UnpatchedSourceDefinition, ManifestNodes
)
from dbt.contracts.files import SourceFile
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.files import SourceFile, FileHash, RemoteFile
from dbt.contracts.util import (
BaseArtifactMetadata, MacroKey, SourceKey, ArtifactMixin, schema_version
)
from dbt.dataclass_schema import dbtClassMixin
from dbt.exceptions import (
InternalException, CompilationException,
raise_duplicate_resource_name, raise_compiler_error, warn_or_error,
raise_invalid_patch,
raise_invalid_patch, raise_duplicate_patch_name,
raise_duplicate_macro_patch_name, raise_duplicate_source_patch_name,
)
from dbt.helper_types import PathSet
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.ui import line_wrap_message
from dbt import deprecations
from dbt import flags
from dbt import tracking
@@ -115,7 +121,8 @@ class SourceCache(PackageAwareCache[SourceKey, ParsedSourceDefinition]):
def populate(self):
for source in self._manifest.sources.values():
self.add_source(source)
if hasattr(source, 'source_name'):
self.add_source(source)
def perform_lookup(
self, unique_id: UniqueID
@@ -234,12 +241,13 @@ def build_edges(nodes: List[ManifestNode]):
for node in nodes:
backward_edges[node.unique_id] = node.depends_on_nodes[:]
for unique_id in node.depends_on_nodes:
forward_edges[unique_id].append(node.unique_id)
if unique_id in forward_edges.keys():
forward_edges[unique_id].append(node.unique_id)
return _sort_values(forward_edges), _sort_values(backward_edges)
def _deepcopy(value):
return value.from_dict(value.to_dict())
return value.from_dict(value.to_dict(omit_none=True))
class Locality(enum.IntEnum):
@@ -427,158 +435,13 @@ def _update_into(dest: MutableMapping[str, T], new_item: T):
dest[unique_id] = new_item
@dataclass
class Manifest:
"""The manifest for the full graph, after parsing and during compilation.
"""
# These attributes are both positional and by keyword. If an attribute
# is added it must all be added in the __reduce_ex__ method in the
# args tuple in the right position.
nodes: MutableMapping[str, ManifestNode]
sources: MutableMapping[str, ParsedSourceDefinition]
macros: MutableMapping[str, ParsedMacro]
docs: MutableMapping[str, ParsedDocumentation]
exposures: MutableMapping[str, ParsedExposure]
selectors: MutableMapping[str, Any]
disabled: List[CompileResultNode]
files: MutableMapping[str, SourceFile]
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
flat_graph: Dict[str, Any] = field(default_factory=dict)
_docs_cache: Optional[DocCache] = None
_sources_cache: Optional[SourceCache] = None
_refs_cache: Optional[RefableCache] = None
_lock: Lock = field(default_factory=flags.MP_CONTEXT.Lock)
@classmethod
def from_macros(
cls,
macros: Optional[MutableMapping[str, ParsedMacro]] = None,
files: Optional[MutableMapping[str, SourceFile]] = None,
) -> 'Manifest':
if macros is None:
macros = {}
if files is None:
files = {}
return cls(
nodes={},
sources={},
macros=macros,
docs={},
exposures={},
selectors={},
disabled=[],
files=files,
)
def sync_update_node(
self, new_node: NonSourceCompiledNode
) -> NonSourceCompiledNode:
"""update the node with a lock. The only time we should want to lock is
when compiling an ephemeral ancestor of a node at runtime, because
multiple threads could be just-in-time compiling the same ephemeral
dependency, and we want them to have a consistent view of the manifest.
If the existing node is not compiled, update it with the new node and
return that. If the existing node is compiled, do not update the
manifest and return the existing node.
"""
with self._lock:
existing = self.nodes[new_node.unique_id]
if getattr(existing, 'compiled', False):
# already compiled -> must be a NonSourceCompiledNode
return cast(NonSourceCompiledNode, existing)
_update_into(self.nodes, new_node)
return new_node
def update_exposure(self, new_exposure: ParsedExposure):
_update_into(self.exposures, new_exposure)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
def update_source(self, new_source: ParsedSourceDefinition):
_update_into(self.sources, new_source)
def build_flat_graph(self):
"""This attribute is used in context.common by each node, so we want to
only build it once and avoid any concurrency issues around it.
Make sure you don't call this until you're done with building your
manifest!
"""
self.flat_graph = {
'nodes': {
k: v.to_dict(omit_none=False) for k, v in self.nodes.items()
},
'sources': {
k: v.to_dict(omit_none=False) for k, v in self.sources.items()
}
}
def find_disabled_by_name(
self, name: str, package: Optional[str] = None
) -> Optional[ManifestNode]:
searcher: NameSearcher = NameSearcher(
name, package, NodeType.refable()
)
result = searcher.search(self.disabled)
return result
def find_disabled_source_by_name(
self, source_name: str, table_name: str, package: Optional[str] = None
) -> Optional[ParsedSourceDefinition]:
search_name = f'{source_name}.{table_name}'
searcher: NameSearcher = NameSearcher(
search_name, package, [NodeType.Source]
)
result = searcher.search(self.disabled)
if result is not None:
assert isinstance(result, ParsedSourceDefinition)
return result
def _find_macros_by_name(
self,
name: str,
root_project_name: str,
filter: Optional[Callable[[MacroCandidate], bool]] = None
) -> CandidateList:
"""Find macros by their name.
"""
# avoid an import cycle
from dbt.adapters.factory import get_adapter_package_names
candidates: CandidateList = CandidateList()
packages = set(get_adapter_package_names(self.metadata.adapter_type))
for unique_id, macro in self.macros.items():
if macro.name != name:
continue
candidate = MacroCandidate(
locality=_get_locality(macro, root_project_name, packages),
macro=macro,
)
if filter is None or filter(candidate):
candidates.append(candidate)
return candidates
def _materialization_candidates_for(
self, project_name: str,
materialization_name: str,
adapter_type: Optional[str],
) -> CandidateList:
if adapter_type is None:
specificity = Specificity.Default
else:
specificity = Specificity.Adapter
full_name = dbt.utils.get_materialization_macro_name(
materialization_name=materialization_name,
adapter_type=adapter_type,
with_prefix=False,
)
return CandidateList(
MaterializationCandidate.from_macro(m, specificity)
for m in self._find_macros_by_name(full_name, project_name)
)
# This contains macro methods that are in both the Manifest
# and the MacroManifest
class MacroMethods:
# Just to make mypy happy. There must be a better way.
def __init__(self):
self.macros = []
self.metadata = {}
def find_macro_by_name(
self, name: str, root_project_name: str, package: Optional[str]
@@ -624,6 +487,159 @@ class Manifest:
)
return candidates.last()
def _find_macros_by_name(
self,
name: str,
root_project_name: str,
filter: Optional[Callable[[MacroCandidate], bool]] = None
) -> CandidateList:
"""Find macros by their name.
"""
# avoid an import cycle
from dbt.adapters.factory import get_adapter_package_names
candidates: CandidateList = CandidateList()
packages = set(get_adapter_package_names(self.metadata.adapter_type))
for unique_id, macro in self.macros.items():
if macro.name != name:
continue
candidate = MacroCandidate(
locality=_get_locality(macro, root_project_name, packages),
macro=macro,
)
if filter is None or filter(candidate):
candidates.append(candidate)
return candidates
@dataclass
class ManifestStateCheck(dbtClassMixin):
vars_hash: FileHash
profile_hash: FileHash
project_hashes: MutableMapping[str, FileHash]
@dataclass
class Manifest(MacroMethods):
"""The manifest for the full graph, after parsing and during compilation.
"""
# These attributes are both positional and by keyword. If an attribute
# is added it must all be added in the __reduce_ex__ method in the
# args tuple in the right position.
nodes: MutableMapping[str, ManifestNode]
sources: MutableMapping[str, ParsedSourceDefinition]
macros: MutableMapping[str, ParsedMacro]
docs: MutableMapping[str, ParsedDocumentation]
exposures: MutableMapping[str, ParsedExposure]
selectors: MutableMapping[str, Any]
disabled: List[CompileResultNode]
files: MutableMapping[str, SourceFile]
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
flat_graph: Dict[str, Any] = field(default_factory=dict)
state_check: Optional[ManifestStateCheck] = None
# Moved from the ParseResult object
macro_patches: MutableMapping[MacroKey, ParsedMacroPatch] = field(default_factory=dict)
patches: MutableMapping[str, ParsedNodePatch] = field(default_factory=dict)
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
# following is from ParseResult
_disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
_docs_cache: Optional[DocCache] = None
_sources_cache: Optional[SourceCache] = None
_refs_cache: Optional[RefableCache] = None
_lock: Lock = field(default_factory=flags.MP_CONTEXT.Lock)
def sync_update_node(
self, new_node: NonSourceCompiledNode
) -> NonSourceCompiledNode:
"""update the node with a lock. The only time we should want to lock is
when compiling an ephemeral ancestor of a node at runtime, because
multiple threads could be just-in-time compiling the same ephemeral
dependency, and we want them to have a consistent view of the manifest.
If the existing node is not compiled, update it with the new node and
return that. If the existing node is compiled, do not update the
manifest and return the existing node.
"""
with self._lock:
existing = self.nodes[new_node.unique_id]
if getattr(existing, 'compiled', False):
# already compiled -> must be a NonSourceCompiledNode
return cast(NonSourceCompiledNode, existing)
_update_into(self.nodes, new_node)
return new_node
def update_exposure(self, new_exposure: ParsedExposure):
_update_into(self.exposures, new_exposure)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
def update_source(self, new_source: ParsedSourceDefinition):
_update_into(self.sources, new_source)
def build_flat_graph(self):
"""This attribute is used in context.common by each node, so we want to
only build it once and avoid any concurrency issues around it.
Make sure you don't call this until you're done with building your
manifest!
"""
self.flat_graph = {
'exposures': {
k: v.to_dict(omit_none=False)
for k, v in self.exposures.items()
},
'nodes': {
k: v.to_dict(omit_none=False)
for k, v in self.nodes.items()
},
'sources': {
k: v.to_dict(omit_none=False)
for k, v in self.sources.items()
}
}
def find_disabled_by_name(
self, name: str, package: Optional[str] = None
) -> Optional[ManifestNode]:
searcher: NameSearcher = NameSearcher(
name, package, NodeType.refable()
)
result = searcher.search(self.disabled)
return result
def find_disabled_source_by_name(
self, source_name: str, table_name: str, package: Optional[str] = None
) -> Optional[ParsedSourceDefinition]:
search_name = f'{source_name}.{table_name}'
searcher: NameSearcher = NameSearcher(
search_name, package, [NodeType.Source]
)
result = searcher.search(self.disabled)
if result is not None:
assert isinstance(result, ParsedSourceDefinition)
return result
def _materialization_candidates_for(
self, project_name: str,
materialization_name: str,
adapter_type: Optional[str],
) -> CandidateList:
if adapter_type is None:
specificity = Specificity.Default
else:
specificity = Specificity.Adapter
full_name = dbt.utils.get_materialization_macro_name(
materialization_name=materialization_name,
adapter_type=adapter_type,
with_prefix=False,
)
return CandidateList(
MaterializationCandidate.from_macro(m, specificity)
for m in self._find_macros_by_name(full_name, project_name)
)
def find_materialization_macro_by_name(
self, project_name: str, materialization_name: str, adapter_type: str
) -> Optional[ParsedMacro]:
@@ -638,7 +654,7 @@ class Manifest:
def get_resource_fqns(self) -> Mapping[str, PathSet]:
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
all_resources = chain(self.nodes.values(), self.sources.values())
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
if resource_type_plural not in resource_fqns:
@@ -657,26 +673,51 @@ class Manifest:
if node.resource_type in NodeType.refable():
self._refs_cache.add_node(node)
def patch_macros(
self, patches: MutableMapping[MacroKey, ParsedMacroPatch]
def add_patch(
self, source_file: SourceFile, patch: ParsedNodePatch,
) -> None:
# patches can't be overwritten
if patch.name in self.patches:
raise_duplicate_patch_name(patch, self.patches[patch.name])
self.patches[patch.name] = patch
self.get_file(source_file).patches.append(patch.name)
def add_macro_patch(
self, source_file: SourceFile, patch: ParsedMacroPatch,
) -> None:
# macros are fully namespaced
key = (patch.package_name, patch.name)
if key in self.macro_patches:
raise_duplicate_macro_patch_name(patch, self.macro_patches[key])
self.macro_patches[key] = patch
self.get_file(source_file).macro_patches.append(key)
def add_source_patch(
self, source_file: SourceFile, patch: SourcePatch,
) -> None:
# source patches must be unique
key = (patch.overrides, patch.name)
if key in self.source_patches:
raise_duplicate_source_patch_name(patch, self.source_patches[key])
self.source_patches[key] = patch
self.get_file(source_file).source_patches.append(key)
def patch_macros(self) -> None:
for macro in self.macros.values():
key = (macro.package_name, macro.name)
patch = patches.pop(key, None)
patch = self.macro_patches.pop(key, None)
if not patch:
continue
macro.patch(patch)
if patches:
for patch in patches.values():
if self.macro_patches:
for patch in self.macro_patches.values():
warn_or_error(
f'WARNING: Found documentation for macro "{patch.name}" '
f'which was not found'
)
def patch_nodes(
self, patches: MutableMapping[str, ParsedNodePatch]
) -> None:
def patch_nodes(self) -> None:
"""Patch nodes with the given dict of patches. Note that this consumes
the input!
This relies on the fact that all nodes have unique _name_ fields, not
@@ -686,8 +727,10 @@ class Manifest:
# only have the node name in the patch, we have to iterate over all the
# nodes looking for matching names. We could use a NameSearcher if we
# were ok with doing an O(n*m) search (one nodes scan per patch)
# Q: could we save patches by node unique_ids instead, or convert
# between names and node ids?
for node in self.nodes.values():
patch = patches.pop(node.name, None)
patch = self.patches.pop(node.name, None)
if not patch:
continue
@@ -705,9 +748,10 @@ class Manifest:
node.patch(patch)
# log debug-level warning about nodes we couldn't find
if patches:
for patch in patches.values():
# If anything is left in self.patches, it means that the node for
# that patch wasn't found.
if self.patches:
for patch in self.patches.values():
# since patches aren't nodes, we can't use the existing
# target_not_found warning
logger.debug((
@@ -728,6 +772,7 @@ class Manifest:
chain(self.nodes.values(), self.sources.values())
)
# This is used in dbt.task.rpc.sql_commands 'add_new_refs'
def deepcopy(self):
return Manifest(
nodes={k: _deepcopy(v) for k, v in self.nodes.items()},
@@ -735,10 +780,11 @@ class Manifest:
macros={k: _deepcopy(v) for k, v in self.macros.items()},
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
selectors=self.root_project.manifest_selectors,
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
disabled=[_deepcopy(n) for n in self.disabled],
files={k: _deepcopy(v) for k, v in self.files.items()},
state_check=_deepcopy(self.state_check),
)
def writable_manifest(self):
@@ -762,10 +808,10 @@ class Manifest:
parent_map=backward_edges,
)
def to_dict(self, omit_none=True, validate=False):
return self.writable_manifest().to_dict(
omit_none=omit_none, validate=validate
)
# When 'to_dict' is called on the Manifest, it substitues a
# WritableManifest
def __pre_serialize__(self):
return self.writable_manifest()
def write(self, path):
self.writable_manifest().write(path)
@@ -887,6 +933,7 @@ class Manifest:
def merge_from_artifact(
self,
adapter,
other: 'WritableManifest',
selected: AbstractSet[UniqueID],
) -> None:
@@ -898,10 +945,14 @@ class Manifest:
refables = set(NodeType.refable())
merged = set()
for unique_id, node in other.nodes.items():
if (
current = self.nodes.get(unique_id)
if current and (
node.resource_type in refables and
not node.is_ephemeral and
unique_id not in selected
unique_id not in selected and
not adapter.get_relation(
current.database, current.schema, current.identifier
)
):
merged.add(unique_id)
self.nodes[unique_id] = node.replace(deferred=True)
@@ -912,6 +963,212 @@ class Manifest:
f'Merged {len(merged)} items from state (sample: {sample})'
)
# Methods that were formerly in ParseResult
def get_file(self, source_file: SourceFile) -> SourceFile:
key = source_file.search_key
if key is None:
return source_file
if key not in self.files:
self.files[key] = source_file
return self.files[key]
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
if macro.unique_id in self.macros:
# detect that the macro exists and emit an error
other_path = self.macros[macro.unique_id].original_file_path
# subtract 2 for the "Compilation Error" indent
# note that the line wrap eats newlines, so if you want newlines,
# this is the result :(
msg = line_wrap_message(
f'''\
dbt found two macros named "{macro.name}" in the project
"{macro.package_name}".
To fix this error, rename or remove one of the following
macros:
- {macro.original_file_path}
- {other_path}
''',
subtract=2
)
raise_compiler_error(msg)
self.macros[macro.unique_id] = macro
self.get_file(source_file).macros.append(macro.unique_id)
def has_file(self, source_file: SourceFile) -> bool:
key = source_file.search_key
if key is None:
return False
if key not in self.files:
return False
my_checksum = self.files[key].checksum
return my_checksum == source_file.checksum
def add_source(
self, source_file: SourceFile, source: UnpatchedSourceDefinition
):
# sources can't be overwritten!
_check_duplicates(source, self.sources)
self.sources[source.unique_id] = source # type: ignore
self.get_file(source_file).sources.append(source.unique_id)
def add_node_nofile(self, node: ManifestNodes):
# nodes can't be overwritten!
_check_duplicates(node, self.nodes)
self.nodes[node.unique_id] = node
def add_node(self, source_file: SourceFile, node: ManifestNodes):
self.add_node_nofile(node)
self.get_file(source_file).nodes.append(node.unique_id)
def add_exposure(self, source_file: SourceFile, exposure: ParsedExposure):
_check_duplicates(exposure, self.exposures)
self.exposures[exposure.unique_id] = exposure
self.get_file(source_file).exposures.append(exposure.unique_id)
def add_disabled_nofile(self, node: CompileResultNode):
if node.unique_id in self._disabled:
self._disabled[node.unique_id].append(node)
else:
self._disabled[node.unique_id] = [node]
def add_disabled(self, source_file: SourceFile, node: CompileResultNode):
self.add_disabled_nofile(node)
self.get_file(source_file).nodes.append(node.unique_id)
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
_check_duplicates(doc, self.docs)
self.docs[doc.unique_id] = doc
self.get_file(source_file).docs.append(doc.unique_id)
def _get_disabled(
self,
unique_id: str,
match_file: SourceFile,
) -> List[CompileResultNode]:
if unique_id not in self._disabled:
raise InternalException(
'called _get_disabled with id={}, but it does not exist'
.format(unique_id)
)
return [
n for n in self._disabled[unique_id]
if n.original_file_path == match_file.path.original_file_path
]
# This is only used by 'sanitized_update' which processes "old_manifest"
def _process_node(
self,
node_id: str,
source_file: SourceFile,
old_file: SourceFile,
old_manifest: Any,
) -> None:
"""Nodes are a special kind of complicated - there can be multiple
with the same name, as long as all but one are disabled.
Only handle nodes where the matching node has the same resource type
as the current parser.
"""
source_path = source_file.path.original_file_path
found: bool = False
if node_id in old_manifest.nodes:
old_node = old_manifest.nodes[node_id]
if old_node.original_file_path == source_path:
self.add_node(source_file, old_node)
found = True
if node_id in old_manifest._disabled:
matches = old_manifest._get_disabled(node_id, source_file)
for match in matches:
self.add_disabled(source_file, match)
found = True
if not found:
raise CompilationException(
'Expected to find "{}" in cached "manifest.nodes" or '
'"manifest.disabled" based on cached file information: {}!'
.format(node_id, old_file)
)
# This is called by ManifestLoader._get_cached/parse_with_cache,
# which handles updating the ManifestLoader results with information
# from the "old_manifest", i.e. the pickle file if the checksums are
# the same.
def sanitized_update(
self,
source_file: SourceFile,
old_manifest: Any,
resource_type: NodeType,
) -> bool:
if isinstance(source_file.path, RemoteFile):
return False
old_file = old_manifest.get_file(source_file)
for doc_id in old_file.docs:
doc = _expect_value(doc_id, old_manifest.docs, old_file, "docs")
self.add_doc(source_file, doc)
for macro_id in old_file.macros:
macro = _expect_value(
macro_id, old_manifest.macros, old_file, "macros"
)
self.add_macro(source_file, macro)
for source_id in old_file.sources:
source = _expect_value(
source_id, old_manifest.sources, old_file, "sources"
)
self.add_source(source_file, source)
# because we know this is how we _parsed_ the node, we can safely
# assume if it's disabled it was done by the project or file, and
# we can keep our old data
# the node ID could be in old_manifest.disabled AND in old_manifest.nodes.
# In that case, we have to make sure the path also matches.
for node_id in old_file.nodes:
# cheat: look at the first part of the node ID and compare it to
# the parser resource type. On a mismatch, bail out.
if resource_type != node_id.split('.')[0]:
continue
self._process_node(node_id, source_file, old_file, old_manifest)
for exposure_id in old_file.exposures:
exposure = _expect_value(
exposure_id, old_manifest.exposures, old_file, "exposures"
)
self.add_exposure(source_file, exposure)
# Note: There shouldn't be any patches in here after the cleanup.
# The pickled Manifest should have had all patches applied.
patched = False
for name in old_file.patches:
patch = _expect_value(
name, old_manifest.patches, old_file, "patches"
)
self.add_patch(source_file, patch)
patched = True
if patched:
self.get_file(source_file).patches.sort()
macro_patched = False
for key in old_file.macro_patches:
macro_patch = _expect_value(
key, old_manifest.macro_patches, old_file, "macro_patches"
)
self.add_macro_patch(source_file, macro_patch)
macro_patched = True
if macro_patched:
self.get_file(source_file).macro_patches.sort()
return True
# end of methods formerly in ParseResult
# Provide support for copy.deepcopy() - we just need to avoid the lock!
# pickle and deepcopy use this. It returns a callable object used to
# create the initial version of the object and a tuple of arguments
@@ -931,6 +1188,11 @@ class Manifest:
self.files,
self.metadata,
self.flat_graph,
self.state_check,
self.macro_patches,
self.patches,
self.source_patches,
self._disabled,
self._docs_cache,
self._sources_cache,
self._refs_cache,
@@ -938,6 +1200,18 @@ class Manifest:
return self.__class__, args
class MacroManifest(MacroMethods):
def __init__(self, macros):
self.macros = macros
self.metadata = ManifestMetadata()
# This is returned by the 'graph' context property
# in the ProviderContext class.
self.flat_graph = {}
AnyManifest = Union[Manifest, MacroManifest]
@dataclass
@schema_version('manifest', 1)
class WritableManifest(ArtifactMixin):
@@ -983,3 +1257,22 @@ class WritableManifest(ArtifactMixin):
metadata: ManifestMetadata = field(metadata=dict(
description='Metadata about the manifest',
))
def _check_duplicates(
value: HasUniqueID, src: Mapping[str, HasUniqueID]
):
if value.unique_id in src:
raise_duplicate_resource_name(value, src[value.unique_id])
def _expect_value(
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
) -> V_T:
if key not in src:
raise CompilationException(
'Expected to find "{}" in cached "result.{}" based '
'on cached file information: {}!'
.format(key, name, old_file)
)
return src[key]

View File

@@ -2,19 +2,12 @@ from dataclasses import field, Field, dataclass
from enum import Enum
from itertools import chain
from typing import (
Any, List, Optional, Dict, MutableMapping, Union, Type, NewType, Tuple,
TypeVar, Callable
Any, List, Optional, Dict, MutableMapping, Union, Type,
TypeVar, Callable,
)
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError, register_pattern,
)
# TODO: patch+upgrade hologram to avoid this jsonschema import
import jsonschema # type: ignore
# This is protected, but we really do want to reuse this logic, and the cache!
# It would be nice to move the custom error picking stuff into hologram!
from hologram import _validate_schema
from hologram import JsonSchemaMixin, ValidationError
from hologram.helpers import StrEnum, register_pattern
from dbt.contracts.graph.unparsed import AdditionalPropertiesAllowed
from dbt.exceptions import CompilationException, InternalException
from dbt.contracts.util import Replaceable, list_str
@@ -170,22 +163,15 @@ def insensitive_patterns(*patterns: str):
return '^({})$'.format('|'.join(lowercased))
Severity = NewType('Severity', str)
class Severity(str):
pass
register_pattern(Severity, insensitive_patterns('warn', 'error'))
class SnapshotStrategy(StrEnum):
Timestamp = 'timestamp'
Check = 'check'
class All(StrEnum):
All = 'all'
@dataclass
class Hook(JsonSchemaMixin, Replaceable):
class Hook(dbtClassMixin, Replaceable):
sql: str
transaction: bool = True
index: Optional[int] = None
@@ -313,29 +299,6 @@ class BaseConfig(
)
return result
def to_dict(
self,
omit_none: bool = True,
validate: bool = False,
*,
omit_hidden: bool = True,
) -> Dict[str, Any]:
result = super().to_dict(omit_none=omit_none, validate=validate)
if omit_hidden and not omit_none:
for fld, target_field in self._get_fields():
if target_field not in result:
continue
# if the field is not None, preserve it regardless of the
# setting. This is in line with existing behavior, but isn't
# an endorsement of it!
if result[target_field] is not None:
continue
if not ShowBehavior.should_show(fld):
del result[target_field]
return result
def update_from(
self: T, data: Dict[str, Any], adapter_type: str, validate: bool = True
) -> T:
@@ -344,7 +307,7 @@ class BaseConfig(
"""
# sadly, this is a circular import
from dbt.adapters.factory import get_config_class_by_name
dct = self.to_dict(omit_none=False, validate=False, omit_hidden=False)
dct = self.to_dict(omit_none=False)
adapter_config_cls = get_config_class_by_name(adapter_type)
@@ -358,21 +321,23 @@ class BaseConfig(
dct.update(data)
# any validation failures must have come from the update
return self.from_dict(dct, validate=validate)
if validate:
self.validate(dct)
return self.from_dict(dct)
def finalize_and_validate(self: T) -> T:
# from_dict will validate for us
dct = self.to_dict(omit_none=False, validate=False)
dct = self.to_dict(omit_none=False)
self.validate(dct)
return self.from_dict(dct)
def replace(self, **kwargs):
dct = self.to_dict(validate=False)
dct = self.to_dict(omit_none=True)
mapping = self.field_mapping()
for key, value in kwargs.items():
new_key = mapping.get(key, key)
dct[new_key] = value
return self.from_dict(dct, validate=False)
return self.from_dict(dct)
@dataclass
@@ -431,12 +396,33 @@ class NodeConfig(BaseConfig):
full_refresh: Optional[bool] = None
@classmethod
def from_dict(cls, data, validate=True):
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
field_map = {'post-hook': 'post_hook', 'pre-hook': 'pre_hook'}
# create a new dict because otherwise it gets overwritten in
# tests
new_dict = {}
for key in data:
new_dict[key] = data[key]
data = new_dict
for key in hooks.ModelHookType:
if key in data:
data[key] = [hooks.get_hook_dict(h) for h in data[key]]
return super().from_dict(data, validate=validate)
for field_name in field_map:
if field_name in data:
new_name = field_map[field_name]
data[new_name] = data.pop(field_name)
return data
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
field_map = {'post_hook': 'post-hook', 'pre_hook': 'pre-hook'}
for field_name in field_map:
if field_name in dct:
dct[field_map[field_name]] = dct.pop(field_name)
return dct
# this is still used by jsonschema validation
@classmethod
def field_mapping(cls):
return {'post_hook': 'post-hook', 'pre_hook': 'pre-hook'}
@@ -450,184 +436,53 @@ class SeedConfig(NodeConfig):
@dataclass
class TestConfig(NodeConfig):
materialized: str = 'test'
severity: Severity = Severity('ERROR')
SnapshotVariants = Union[
'TimestampSnapshotConfig',
'CheckSnapshotConfig',
'GenericSnapshotConfig',
]
def _relevance_without_strategy(error: jsonschema.ValidationError):
# calculate the 'relevance' of an error the normal jsonschema way, except
# if the validator is in the 'strategy' field and its conflicting with the
# 'enum'. This suppresses `"'timestamp' is not one of ['check']` and such
if 'strategy' in error.path and error.validator in {'enum', 'not'}:
length = 1
else:
length = -len(error.path)
validator = error.validator
return length, validator not in {'anyOf', 'oneOf'}
@dataclass
class SnapshotWrapper(JsonSchemaMixin):
"""This is a little wrapper to let us serialize/deserialize the
SnapshotVariants union.
"""
config: SnapshotVariants # mypy: ignore
@classmethod
def validate(cls, data: Any):
config = data.get('config', {})
if config.get('strategy') == 'check':
schema = _validate_schema(CheckSnapshotConfig)
to_validate = config
elif config.get('strategy') == 'timestamp':
schema = _validate_schema(TimestampSnapshotConfig)
to_validate = config
else:
schema = _validate_schema(cls)
to_validate = data
validator = jsonschema.Draft7Validator(schema)
error = jsonschema.exceptions.best_match(
validator.iter_errors(to_validate),
key=_relevance_without_strategy,
)
if error is not None:
raise ValidationError.create_from(error) from error
@dataclass
class EmptySnapshotConfig(NodeConfig):
materialized: str = 'snapshot'
@dataclass(init=False)
@dataclass
class SnapshotConfig(EmptySnapshotConfig):
unique_key: str = field(init=False, metadata=dict(init_required=True))
target_schema: str = field(init=False, metadata=dict(init_required=True))
strategy: Optional[str] = None
unique_key: Optional[str] = None
target_schema: Optional[str] = None
target_database: Optional[str] = None
def __init__(
self,
unique_key: str,
target_schema: str,
target_database: Optional[str] = None,
**kwargs
) -> None:
self.unique_key = unique_key
self.target_schema = target_schema
self.target_database = target_database
# kwargs['materialized'] = materialized
super().__init__(**kwargs)
# type hacks...
@classmethod
def _get_fields(cls) -> List[Tuple[Field, str]]: # type: ignore
fields: List[Tuple[Field, str]] = []
for old_field, name in super()._get_fields():
new_field = old_field
# tell hologram we're really an initvar
if old_field.metadata and old_field.metadata.get('init_required'):
new_field = field(init=True, metadata=old_field.metadata)
new_field.name = old_field.name
new_field.type = old_field.type
new_field._field_type = old_field._field_type # type: ignore
fields.append((new_field, name))
return fields
def finalize_and_validate(self: 'SnapshotConfig') -> SnapshotVariants:
data = self.to_dict()
return SnapshotWrapper.from_dict({'config': data}).config
@dataclass(init=False)
class GenericSnapshotConfig(SnapshotConfig):
strategy: str = field(init=False, metadata=dict(init_required=True))
def __init__(self, strategy: str, **kwargs) -> None:
self.strategy = strategy
super().__init__(**kwargs)
updated_at: Optional[str] = None
check_cols: Optional[Union[str, List[str]]] = None
@classmethod
def _collect_json_schema(
cls, definitions: Dict[str, Any]
) -> Dict[str, Any]:
# this is the method you want to override in hologram if you want
# to do clever things about the json schema and have classes that
# contain instances of your JsonSchemaMixin respect the change.
schema = super()._collect_json_schema(definitions)
def validate(cls, data):
super().validate(data)
if data.get('strategy') == 'check':
if not data.get('check_cols'):
raise ValidationError(
"A snapshot configured with the check strategy must "
"specify a check_cols configuration.")
if (isinstance(data['check_cols'], str) and
data['check_cols'] != 'all'):
raise ValidationError(
f"Invalid value for 'check_cols': {data['check_cols']}. "
"Expected 'all' or a list of strings.")
# Instead of just the strategy we'd calculate normally, say
# "this strategy except none of our specialization strategies".
strategies = [schema['properties']['strategy']]
for specialization in (TimestampSnapshotConfig, CheckSnapshotConfig):
strategies.append(
{'not': specialization.json_schema()['properties']['strategy']}
)
elif data.get('strategy') == 'timestamp':
if not data.get('updated_at'):
raise ValidationError(
"A snapshot configured with the timestamp strategy "
"must specify an updated_at configuration.")
if data.get('check_cols'):
raise ValidationError(
"A 'timestamp' snapshot should not have 'check_cols'")
# If the strategy is not 'check' or 'timestamp' it's a custom strategy,
# formerly supported with GenericSnapshotConfig
schema['properties']['strategy'] = {
'allOf': strategies
}
return schema
@dataclass(init=False)
class TimestampSnapshotConfig(SnapshotConfig):
strategy: str = field(
init=False,
metadata=dict(
restrict=[str(SnapshotStrategy.Timestamp)],
init_required=True,
),
)
updated_at: str = field(init=False, metadata=dict(init_required=True))
def __init__(
self, strategy: str, updated_at: str, **kwargs
) -> None:
self.strategy = strategy
self.updated_at = updated_at
super().__init__(**kwargs)
@dataclass(init=False)
class CheckSnapshotConfig(SnapshotConfig):
strategy: str = field(
init=False,
metadata=dict(
restrict=[str(SnapshotStrategy.Check)],
init_required=True,
),
)
# TODO: is there a way to get this to accept tuples of strings? Adding
# `Tuple[str, ...]` to the list of types results in this:
# ['email'] is valid under each of {'type': 'array', 'items':
# {'type': 'string'}}, {'type': 'array', 'items': {'type': 'string'}}
# but without it, parsing gets upset about values like `('email',)`
# maybe hologram itself should support this behavior? It's not like tuples
# are meaningful in json
check_cols: Union[All, List[str]] = field(
init=False,
metadata=dict(init_required=True),
)
def __init__(
self, strategy: str, check_cols: Union[All, List[str]],
**kwargs
) -> None:
self.strategy = strategy
self.check_cols = check_cols
super().__init__(**kwargs)
def finalize_and_validate(self):
data = self.to_dict(omit_none=True)
self.validate(data)
return self.from_dict(data)
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {

View File

@@ -13,8 +13,9 @@ from typing import (
TypeVar,
)
from hologram import JsonSchemaMixin
from hologram.helpers import ExtensibleJsonSchemaMixin
from dbt.dataclass_schema import (
dbtClassMixin, ExtensibleDbtClassMixin
)
from dbt.clients.system import write_file
from dbt.contracts.files import FileHash, MAXIMUM_SEED_SIZE_NAME
@@ -38,20 +39,14 @@ from .model_config import (
TestConfig,
SourceConfig,
EmptySnapshotConfig,
SnapshotVariants,
)
# import these 3 so the SnapshotVariants forward ref works.
from .model_config import ( # noqa
TimestampSnapshotConfig,
CheckSnapshotConfig,
GenericSnapshotConfig,
SnapshotConfig,
)
@dataclass
class ColumnInfo(
AdditionalPropertiesMixin,
ExtensibleJsonSchemaMixin,
ExtensibleDbtClassMixin,
Replaceable
):
name: str
@@ -64,7 +59,7 @@ class ColumnInfo(
@dataclass
class HasFqn(JsonSchemaMixin, Replaceable):
class HasFqn(dbtClassMixin, Replaceable):
fqn: List[str]
def same_fqn(self, other: 'HasFqn') -> bool:
@@ -72,12 +67,12 @@ class HasFqn(JsonSchemaMixin, Replaceable):
@dataclass
class HasUniqueID(JsonSchemaMixin, Replaceable):
class HasUniqueID(dbtClassMixin, Replaceable):
unique_id: str
@dataclass
class MacroDependsOn(JsonSchemaMixin, Replaceable):
class MacroDependsOn(dbtClassMixin, Replaceable):
macros: List[str] = field(default_factory=list)
# 'in' on lists is O(n) so this is O(n^2) for # of macros
@@ -96,12 +91,22 @@ class DependsOn(MacroDependsOn):
@dataclass
class HasRelationMetadata(JsonSchemaMixin, Replaceable):
class HasRelationMetadata(dbtClassMixin, Replaceable):
database: Optional[str]
schema: str
# Can't set database to None like it ought to be
# because it messes up the subclasses and default parameters
# so hack it here
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
if 'database' not in data:
data['database'] = None
return data
class ParsedNodeMixins(JsonSchemaMixin):
class ParsedNodeMixins(dbtClassMixin):
resource_type: NodeType
depends_on: DependsOn
config: NodeConfig
@@ -132,8 +137,12 @@ class ParsedNodeMixins(JsonSchemaMixin):
self.meta = patch.meta
self.docs = patch.docs
if flags.STRICT_MODE:
assert isinstance(self, JsonSchemaMixin)
self.to_dict(validate=True, omit_none=False)
# It seems odd that an instance can be invalid
# Maybe there should be validation or restrictions
# elsewhere?
assert isinstance(self, dbtClassMixin)
dct = self.to_dict(omit_none=False)
self.validate(dct)
def get_materialization(self):
return self.config.materialized
@@ -335,14 +344,14 @@ class ParsedSeedNode(ParsedNode):
@dataclass
class TestMetadata(JsonSchemaMixin, Replaceable):
namespace: Optional[str]
class TestMetadata(dbtClassMixin, Replaceable):
name: str
kwargs: Dict[str, Any]
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None
@dataclass
class HasTestMetadata(JsonSchemaMixin):
class HasTestMetadata(dbtClassMixin):
test_metadata: TestMetadata
@@ -394,7 +403,7 @@ class IntermediateSnapshotNode(ParsedNode):
@dataclass
class ParsedSnapshotNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Snapshot]})
config: SnapshotVariants
config: SnapshotConfig
@dataclass
@@ -443,8 +452,10 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
self.docs = patch.docs
self.arguments = patch.arguments
if flags.STRICT_MODE:
assert isinstance(self, JsonSchemaMixin)
self.to_dict(validate=True, omit_none=False)
# What does this actually validate?
assert isinstance(self, dbtClassMixin)
dct = self.to_dict(omit_none=False)
self.validate(dct)
def same_contents(self, other: Optional['ParsedMacro']) -> bool:
if other is None:
@@ -654,9 +665,9 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
type: ExposureType
owner: ExposureOwner
resource_type: NodeType = NodeType.Exposure
description: str = ''
maturity: Optional[MaturityType] = None
url: Optional[str] = None
description: Optional[str] = None
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
@@ -709,6 +720,18 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
)
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedDataTestNode,
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSchemaTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
]
ParsedResource = Union[
ParsedDocumentation,
ParsedMacro,

View File

@@ -8,8 +8,9 @@ from dbt.contracts.util import (
import dbt.helper_types # noqa:F401
from dbt.exceptions import CompilationException
from hologram import JsonSchemaMixin
from hologram.helpers import StrEnum, ExtensibleJsonSchemaMixin
from dbt.dataclass_schema import (
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin
)
from dataclasses import dataclass, field
from datetime import timedelta
@@ -18,7 +19,7 @@ from typing import Optional, List, Union, Dict, Any, Sequence
@dataclass
class UnparsedBaseNode(JsonSchemaMixin, Replaceable):
class UnparsedBaseNode(dbtClassMixin, Replaceable):
package_name: str
root_path: str
path: str
@@ -66,12 +67,12 @@ class UnparsedRunHook(UnparsedNode):
@dataclass
class Docs(JsonSchemaMixin, Replaceable):
class Docs(dbtClassMixin, Replaceable):
show: bool = True
@dataclass
class HasDocs(AdditionalPropertiesMixin, ExtensibleJsonSchemaMixin,
class HasDocs(AdditionalPropertiesMixin, ExtensibleDbtClassMixin,
Replaceable):
name: str
description: str = ''
@@ -100,7 +101,7 @@ class UnparsedColumn(HasTests):
@dataclass
class HasColumnDocs(JsonSchemaMixin, Replaceable):
class HasColumnDocs(dbtClassMixin, Replaceable):
columns: Sequence[HasDocs] = field(default_factory=list)
@@ -110,7 +111,7 @@ class HasColumnTests(HasColumnDocs):
@dataclass
class HasYamlMetadata(JsonSchemaMixin):
class HasYamlMetadata(dbtClassMixin):
original_file_path: str
yaml_key: str
package_name: str
@@ -127,7 +128,7 @@ class UnparsedNodeUpdate(HasColumnTests, HasTests, HasYamlMetadata):
@dataclass
class MacroArgument(JsonSchemaMixin):
class MacroArgument(dbtClassMixin):
name: str
type: Optional[str] = None
description: str = ''
@@ -148,7 +149,7 @@ class TimePeriod(StrEnum):
@dataclass
class Time(JsonSchemaMixin, Replaceable):
class Time(dbtClassMixin, Replaceable):
count: int
period: TimePeriod
@@ -158,19 +159,14 @@ class Time(JsonSchemaMixin, Replaceable):
return actual_age > difference
class FreshnessStatus(StrEnum):
Pass = 'pass'
Warn = 'warn'
Error = 'error'
@dataclass
class FreshnessThreshold(JsonSchemaMixin, Mergeable):
class FreshnessThreshold(dbtClassMixin, Mergeable):
warn_after: Optional[Time] = None
error_after: Optional[Time] = None
filter: Optional[str] = None
def status(self, age: float) -> FreshnessStatus:
def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus":
from dbt.contracts.results import FreshnessStatus
if self.error_after and self.error_after.exceeded(age):
return FreshnessStatus.Error
elif self.warn_after and self.warn_after.exceeded(age):
@@ -185,7 +181,7 @@ class FreshnessThreshold(JsonSchemaMixin, Mergeable):
@dataclass
class AdditionalPropertiesAllowed(
AdditionalPropertiesMixin,
ExtensibleJsonSchemaMixin
ExtensibleDbtClassMixin
):
_extra: Dict[str, Any] = field(default_factory=dict)
@@ -217,7 +213,7 @@ class ExternalTable(AdditionalPropertiesAllowed, Mergeable):
@dataclass
class Quoting(JsonSchemaMixin, Mergeable):
class Quoting(dbtClassMixin, Mergeable):
database: Optional[bool] = None
schema: Optional[bool] = None
identifier: Optional[bool] = None
@@ -235,15 +231,15 @@ class UnparsedSourceTableDefinition(HasColumnTests, HasTests):
external: Optional[ExternalTable] = None
tags: List[str] = field(default_factory=list)
def to_dict(self, omit_none=True, validate=False):
result = super().to_dict(omit_none=omit_none, validate=validate)
if omit_none and self.freshness is None:
result['freshness'] = None
return result
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if 'freshness' not in dct and self.freshness is None:
dct['freshness'] = None
return dct
@dataclass
class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable):
class UnparsedSourceDefinition(dbtClassMixin, Replaceable):
name: str
description: str = ''
meta: Dict[str, Any] = field(default_factory=dict)
@@ -262,15 +258,15 @@ class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable):
def yaml_key(self) -> 'str':
return 'sources'
def to_dict(self, omit_none=True, validate=False):
result = super().to_dict(omit_none=omit_none, validate=validate)
if omit_none and self.freshness is None:
result['freshness'] = None
return result
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if 'freshnewss' not in dct and self.freshness is None:
dct['freshness'] = None
return dct
@dataclass
class SourceTablePatch(JsonSchemaMixin):
class SourceTablePatch(dbtClassMixin):
name: str
description: Optional[str] = None
meta: Optional[Dict[str, Any]] = None
@@ -301,7 +297,7 @@ class SourceTablePatch(JsonSchemaMixin):
@dataclass
class SourcePatch(JsonSchemaMixin, Replaceable):
class SourcePatch(dbtClassMixin, Replaceable):
name: str = field(
metadata=dict(description='The name of the source to override'),
)
@@ -345,7 +341,7 @@ class SourcePatch(JsonSchemaMixin, Replaceable):
@dataclass
class UnparsedDocumentation(JsonSchemaMixin, Replaceable):
class UnparsedDocumentation(dbtClassMixin, Replaceable):
package_name: str
root_path: str
path: str
@@ -405,17 +401,17 @@ class MaturityType(StrEnum):
@dataclass
class ExposureOwner(JsonSchemaMixin, Replaceable):
class ExposureOwner(dbtClassMixin, Replaceable):
email: str
name: Optional[str] = None
@dataclass
class UnparsedExposure(JsonSchemaMixin, Replaceable):
class UnparsedExposure(dbtClassMixin, Replaceable):
name: str
type: ExposureType
owner: ExposureOwner
description: str = ''
maturity: Optional[MaturityType] = None
url: Optional[str] = None
description: Optional[str] = None
depends_on: List[str] = field(default_factory=list)

View File

@@ -4,25 +4,39 @@ from dbt.helper_types import NoValue
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt import tracking
from dbt import ui
from hologram import JsonSchemaMixin, ValidationError
from hologram.helpers import HyphenatedJsonSchemaMixin, register_pattern, \
ExtensibleJsonSchemaMixin
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError,
HyphenatedDbtClassMixin,
ExtensibleDbtClassMixin,
register_pattern, ValidatedStringMixin
)
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Union, Any, NewType
from typing import Optional, List, Dict, Union, Any
from mashumaro.types import SerializableType
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
DEFAULT_SEND_ANONYMOUS_USAGE_STATS = True
Name = NewType('Name', str)
class Name(ValidatedStringMixin):
ValidationRegex = r'^[^\d\W]\w*$'
register_pattern(Name, r'^[^\d\W]\w*$')
class SemverString(str, SerializableType):
def _serialize(self) -> str:
return self
@classmethod
def _deserialize(cls, value: str) -> 'SemverString':
return SemverString(value)
# this does not support the full semver (does not allow a trailing -fooXYZ) and
# is not restrictive enough for full semver, (allows '1.0'). But it's like
# 'semver lite'.
SemverString = NewType('SemverString', str)
register_pattern(
SemverString,
r'^(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)(\.(?:0|[1-9]\d*))?$',
@@ -30,15 +44,15 @@ register_pattern(
@dataclass
class Quoting(JsonSchemaMixin, Mergeable):
identifier: Optional[bool]
schema: Optional[bool]
database: Optional[bool]
project: Optional[bool]
class Quoting(dbtClassMixin, Mergeable):
schema: Optional[bool] = None
database: Optional[bool] = None
project: Optional[bool] = None
identifier: Optional[bool] = None
@dataclass
class Package(Replaceable, HyphenatedJsonSchemaMixin):
class Package(Replaceable, HyphenatedDbtClassMixin):
pass
@@ -54,7 +68,7 @@ RawVersion = Union[str, float]
@dataclass
class GitPackage(Package):
git: str
revision: Optional[RawVersion]
revision: Optional[RawVersion] = None
warn_unpinned: Optional[bool] = None
def get_revisions(self) -> List[str]:
@@ -80,7 +94,7 @@ PackageSpec = Union[LocalPackage, GitPackage, RegistryPackage]
@dataclass
class PackageConfig(JsonSchemaMixin, Replaceable):
class PackageConfig(dbtClassMixin, Replaceable):
packages: List[PackageSpec]
@@ -96,13 +110,13 @@ class ProjectPackageMetadata:
@dataclass
class Downloads(ExtensibleJsonSchemaMixin, Replaceable):
class Downloads(ExtensibleDbtClassMixin, Replaceable):
tarball: str
@dataclass
class RegistryPackageMetadata(
ExtensibleJsonSchemaMixin,
ExtensibleDbtClassMixin,
ProjectPackageMetadata,
):
downloads: Downloads
@@ -142,6 +156,7 @@ BANNED_PROJECT_NAMES = {
'sql',
'sql_now',
'store_result',
'store_raw_result',
'target',
'this',
'tojson',
@@ -153,7 +168,7 @@ BANNED_PROJECT_NAMES = {
@dataclass
class Project(HyphenatedJsonSchemaMixin, Replaceable):
class Project(HyphenatedDbtClassMixin, Replaceable):
name: Name
version: Union[SemverString, float]
config_version: int
@@ -180,6 +195,7 @@ class Project(HyphenatedJsonSchemaMixin, Replaceable):
snapshots: Dict[str, Any] = field(default_factory=dict)
analyses: Dict[str, Any] = field(default_factory=dict)
sources: Dict[str, Any] = field(default_factory=dict)
tests: Dict[str, Any] = field(default_factory=dict)
vars: Optional[Dict[str, Any]] = field(
default=None,
metadata=dict(
@@ -190,18 +206,16 @@ class Project(HyphenatedJsonSchemaMixin, Replaceable):
query_comment: Optional[Union[QueryComment, NoValue, str]] = NoValue()
@classmethod
def from_dict(cls, data, validate=True) -> 'Project':
result = super().from_dict(data, validate=validate)
if result.name in BANNED_PROJECT_NAMES:
def validate(cls, data):
super().validate(data)
if data['name'] in BANNED_PROJECT_NAMES:
raise ValidationError(
f'Invalid project name: {result.name} is a reserved word'
f"Invalid project name: {data['name']} is a reserved word"
)
return result
@dataclass
class UserConfig(ExtensibleJsonSchemaMixin, Replaceable, UserConfigContract):
class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
send_anonymous_usage_stats: bool = DEFAULT_SEND_ANONYMOUS_USAGE_STATS
use_colors: Optional[bool] = None
partial_parse: Optional[bool] = None
@@ -221,7 +235,7 @@ class UserConfig(ExtensibleJsonSchemaMixin, Replaceable, UserConfigContract):
@dataclass
class ProfileConfig(HyphenatedJsonSchemaMixin, Replaceable):
class ProfileConfig(HyphenatedDbtClassMixin, Replaceable):
profile_name: str = field(metadata={'preserve_underscore': True})
target_name: str = field(metadata={'preserve_underscore': True})
config: UserConfig
@@ -232,10 +246,10 @@ class ProfileConfig(HyphenatedJsonSchemaMixin, Replaceable):
@dataclass
class ConfiguredQuoting(Quoting, Replaceable):
identifier: bool
schema: bool
database: Optional[bool]
project: Optional[bool]
identifier: bool = True
schema: bool = True
database: Optional[bool] = None
project: Optional[bool] = None
@dataclass
@@ -248,5 +262,5 @@ class Configuration(Project, ProfileConfig):
@dataclass
class ProjectList(JsonSchemaMixin):
class ProjectList(dbtClassMixin):
projects: Dict[str, Project]

View File

@@ -1,12 +1,11 @@
from collections.abc import Mapping
from dataclasses import dataclass, fields
from typing import (
Optional, TypeVar, Generic, Dict,
Optional, Dict,
)
from typing_extensions import Protocol
from hologram import JsonSchemaMixin
from hologram.helpers import StrEnum
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt import deprecations
from dbt.contracts.util import Replaceable
@@ -32,7 +31,7 @@ class HasQuoting(Protocol):
quoting: Dict[str, bool]
class FakeAPIObject(JsonSchemaMixin, Replaceable, Mapping):
class FakeAPIObject(dbtClassMixin, Replaceable, Mapping):
# override the mapping truthiness, len is always >1
def __bool__(self):
return True
@@ -53,21 +52,18 @@ class FakeAPIObject(JsonSchemaMixin, Replaceable, Mapping):
return len(fields(self.__class__))
def incorporate(self, **kwargs):
value = self.to_dict()
value = self.to_dict(omit_none=True)
value = deep_merge(value, kwargs)
return self.from_dict(value)
T = TypeVar('T')
@dataclass
class _ComponentObject(FakeAPIObject, Generic[T]):
database: T
schema: T
identifier: T
class Policy(FakeAPIObject):
database: bool = True
schema: bool = True
identifier: bool = True
def get_part(self, key: ComponentName) -> T:
def get_part(self, key: ComponentName) -> bool:
if key == ComponentName.Database:
return self.database
elif key == ComponentName.Schema:
@@ -80,25 +76,18 @@ class _ComponentObject(FakeAPIObject, Generic[T]):
.format(key, list(ComponentName))
)
def replace_dict(self, dct: Dict[ComponentName, T]):
kwargs: Dict[str, T] = {}
def replace_dict(self, dct: Dict[ComponentName, bool]):
kwargs: Dict[str, bool] = {}
for k, v in dct.items():
kwargs[str(k)] = v
return self.replace(**kwargs)
@dataclass
class Policy(_ComponentObject[bool]):
database: bool = True
schema: bool = True
identifier: bool = True
@dataclass
class Path(_ComponentObject[Optional[str]]):
database: Optional[str]
schema: Optional[str]
identifier: Optional[str]
class Path(FakeAPIObject):
database: Optional[str] = None
schema: Optional[str] = None
identifier: Optional[str] = None
def __post_init__(self):
# handle pesky jinja2.Undefined sneaking in here and messing up rende
@@ -120,3 +109,22 @@ class Path(_ComponentObject[Optional[str]]):
if part is not None:
part = part.lower()
return part
def get_part(self, key: ComponentName) -> Optional[str]:
if key == ComponentName.Database:
return self.database
elif key == ComponentName.Schema:
return self.schema
elif key == ComponentName.Identifier:
return self.identifier
else:
raise ValueError(
'Got a key of {}, expected one of {}'
.format(key, list(ComponentName))
)
def replace_dict(self, dct: Dict[ComponentName, str]):
kwargs: Dict[str, str] = {}
for k, v in dct.items():
kwargs[str(k)] = v
return self.replace(**kwargs)

View File

@@ -1,12 +1,11 @@
from dbt.contracts.graph.manifest import CompileResultNode
from dbt.contracts.graph.unparsed import (
FreshnessStatus, FreshnessThreshold
FreshnessThreshold
)
from dbt.contracts.graph.parsed import ParsedSourceDefinition
from dbt.contracts.util import (
BaseArtifactMetadata,
ArtifactMixin,
Writable,
VersionedSchema,
Replaceable,
schema_version,
@@ -18,18 +17,21 @@ from dbt.logger import (
GLOBAL_LOGGER as logger,
)
from dbt.utils import lowercase
from hologram.helpers import StrEnum
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin, StrEnum
import agate
from dataclasses import dataclass, field
from datetime import datetime
from typing import Union, Dict, List, Optional, Any, NamedTuple, Sequence
from typing import (
Union, Dict, List, Optional, Any, NamedTuple, Sequence,
)
from dbt.clients.system import write_json
@dataclass
class TimingInfo(JsonSchemaMixin):
class TimingInfo(dbtClassMixin):
name: str
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@@ -55,50 +57,73 @@ class collect_timing_info:
logger.debug('finished collecting timing info')
class NodeStatus(StrEnum):
Success = "success"
Error = "error"
Fail = "fail"
Warn = "warn"
Skipped = "skipped"
Pass = "pass"
RuntimeErr = "runtime error"
class RunStatus(StrEnum):
Success = NodeStatus.Success
Error = NodeStatus.Error
Skipped = NodeStatus.Skipped
class TestStatus(StrEnum):
Pass = NodeStatus.Pass
Error = NodeStatus.Error
Fail = NodeStatus.Fail
Warn = NodeStatus.Warn
class FreshnessStatus(StrEnum):
Pass = NodeStatus.Pass
Warn = NodeStatus.Warn
Error = NodeStatus.Error
RuntimeErr = NodeStatus.RuntimeErr
@dataclass
class BaseResult(JsonSchemaMixin):
class BaseResult(dbtClassMixin):
status: Union[RunStatus, TestStatus, FreshnessStatus]
timing: List[TimingInfo]
thread_id: str
execution_time: float
adapter_response: Dict[str, Any]
message: Optional[Union[str, int]]
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
if 'message' not in data:
data['message'] = None
return data
@dataclass
class NodeResult(BaseResult):
node: CompileResultNode
error: Optional[str] = None
status: Union[None, str, int, bool] = None
execution_time: Union[str, int] = 0
thread_id: Optional[str] = None
timing: List[TimingInfo] = field(default_factory=list)
fail: Optional[bool] = None
warn: Optional[bool] = None
@dataclass
class PartialResult(BaseResult, Writable):
pass
# if the result got to the point where it could be skipped/failed, we would
# be returning a real result, not a partial.
@property
def skipped(self):
return False
@dataclass
class WritableRunModelResult(BaseResult, Writable):
skip: bool = False
class RunResult(NodeResult):
agate_table: Optional[agate.Table] = field(
default=None, metadata={
'serialize': lambda x: None, 'deserialize': lambda x: None
}
)
@property
def skipped(self):
return self.skip
return self.status == RunStatus.Skipped
@dataclass
class RunModelResult(WritableRunModelResult):
agate_table: Optional[agate.Table] = None
def to_dict(self, *args, **kwargs):
dct = super().to_dict(*args, **kwargs)
dct.pop('agate_table', None)
return dct
@dataclass
class ExecutionResult(JsonSchemaMixin):
class ExecutionResult(dbtClassMixin):
results: Sequence[BaseResult]
elapsed_time: float
@@ -112,9 +137,6 @@ class ExecutionResult(JsonSchemaMixin):
return self.results[idx]
RunResult = Union[PartialResult, WritableRunModelResult]
@dataclass
class RunResultsMetadata(BaseArtifactMetadata):
dbt_schema_version: str = field(
@@ -123,33 +145,69 @@ class RunResultsMetadata(BaseArtifactMetadata):
@dataclass
@schema_version('run-results', 1)
class RunResultsArtifact(
class RunResultOutput(BaseResult):
unique_id: str
def process_run_result(result: RunResult) -> RunResultOutput:
return RunResultOutput(
unique_id=result.node.unique_id,
status=result.status,
timing=result.timing,
thread_id=result.thread_id,
execution_time=result.execution_time,
message=result.message,
adapter_response=result.adapter_response
)
@dataclass
class RunExecutionResult(
ExecutionResult,
ArtifactMixin,
):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
results=self.results,
elapsed_time=self.elapsed_time,
generated_at=self.generated_at,
args=self.args,
)
writable.write(path)
@dataclass
@schema_version('run-results', 1)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
@classmethod
def from_node_results(
def from_execution_results(
cls,
results: Sequence[RunResult],
elapsed_time: float,
generated_at: datetime,
args: Dict,
):
processed_results = [process_run_result(result) for result in results]
meta = RunResultsMetadata(
dbt_schema_version=str(cls.dbt_schema_version),
generated_at=generated_at,
)
return cls(
metadata=meta,
results=results,
results=processed_results,
elapsed_time=elapsed_time,
args=args
)
def write(self, path: str):
write_json(path, self.to_dict(omit_none=False))
@dataclass
class RunOperationResult(ExecutionResult):
@@ -174,7 +232,7 @@ class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
elapsed_time: float,
generated_at: datetime,
):
meta = RunResultsMetadata(
meta = RunOperationResultMetadata(
dbt_schema_version=str(cls.dbt_schema_version),
generated_at=generated_at,
)
@@ -185,59 +243,56 @@ class RunOperationResultsArtifact(RunOperationResult, ArtifactMixin):
success=success,
)
# due to issues with typing.Union collapsing subclasses, this can't subclass
# PartialResult
@dataclass
class SourceFreshnessResultMixin(JsonSchemaMixin):
class SourceFreshnessResult(NodeResult):
node: ParsedSourceDefinition
status: FreshnessStatus
max_loaded_at: datetime
snapshotted_at: datetime
age: float
# due to issues with typing.Union collapsing subclasses, this can't subclass
# PartialResult
@dataclass
class SourceFreshnessResult(BaseResult, Writable, SourceFreshnessResultMixin):
node: ParsedSourceDefinition
status: FreshnessStatus = FreshnessStatus.Pass
def __post_init__(self):
self.fail = self.status == 'error'
@property
def warned(self):
return self.status == 'warn'
@property
def skipped(self):
return False
def _copykeys(src, keys, **updates):
return {k: getattr(src, k) for k in keys}
class FreshnessErrorEnum(StrEnum):
runtime_error = 'runtime error'
@dataclass
class SourceFreshnessRuntimeError(JsonSchemaMixin):
class SourceFreshnessRuntimeError(dbtClassMixin):
unique_id: str
error: str
state: FreshnessErrorEnum
error: Optional[Union[str, int]]
status: FreshnessErrorEnum
@dataclass
class SourceFreshnessOutput(JsonSchemaMixin):
class SourceFreshnessOutput(dbtClassMixin):
unique_id: str
max_loaded_at: datetime
snapshotted_at: datetime
max_loaded_at_time_ago_in_s: float
state: FreshnessStatus
status: FreshnessStatus
criteria: FreshnessThreshold
adapter_response: Dict[str, Any]
FreshnessNodeResult = Union[PartialResult, SourceFreshnessResult]
@dataclass
class PartialSourceFreshnessResult(NodeResult):
status: FreshnessStatus
@property
def skipped(self):
return False
FreshnessNodeResult = Union[PartialSourceFreshnessResult,
SourceFreshnessResult]
FreshnessNodeOutput = Union[SourceFreshnessRuntimeError, SourceFreshnessOutput]
@@ -245,11 +300,11 @@ def process_freshness_result(
result: FreshnessNodeResult
) -> FreshnessNodeOutput:
unique_id = result.node.unique_id
if result.error is not None:
if result.status == FreshnessStatus.RuntimeErr:
return SourceFreshnessRuntimeError(
unique_id=unique_id,
error=result.error,
state=FreshnessErrorEnum.runtime_error,
error=result.message,
status=FreshnessErrorEnum.runtime_error,
)
# we know that this must be a SourceFreshnessResult
@@ -271,8 +326,9 @@ def process_freshness_result(
max_loaded_at=result.max_loaded_at,
snapshotted_at=result.snapshotted_at,
max_loaded_at_time_ago_in_s=result.age,
state=result.status,
status=result.status,
criteria=criteria,
adapter_response=result.adapter_response
)
@@ -330,40 +386,40 @@ CatalogKey = NamedTuple(
@dataclass
class StatsItem(JsonSchemaMixin):
class StatsItem(dbtClassMixin):
id: str
label: str
value: Primitive
description: Optional[str]
include: bool
description: Optional[str] = None
StatsDict = Dict[str, StatsItem]
@dataclass
class ColumnMetadata(JsonSchemaMixin):
class ColumnMetadata(dbtClassMixin):
type: str
comment: Optional[str]
index: int
name: str
comment: Optional[str] = None
ColumnMap = Dict[str, ColumnMetadata]
@dataclass
class TableMetadata(JsonSchemaMixin):
class TableMetadata(dbtClassMixin):
type: str
database: Optional[str]
schema: str
name: str
comment: Optional[str]
owner: Optional[str]
database: Optional[str] = None
comment: Optional[str] = None
owner: Optional[str] = None
@dataclass
class CatalogTable(JsonSchemaMixin, Replaceable):
class CatalogTable(dbtClassMixin, Replaceable):
metadata: TableMetadata
columns: ColumnMap
stats: StatsDict
@@ -386,12 +442,18 @@ class CatalogMetadata(BaseArtifactMetadata):
@dataclass
class CatalogResults(JsonSchemaMixin):
class CatalogResults(dbtClassMixin):
nodes: Dict[str, CatalogTable]
sources: Dict[str, CatalogTable]
errors: Optional[List[str]]
errors: Optional[List[str]] = None
_compile_results: Optional[Any] = None
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if '_compile_results' in dct:
del dct['_compile_results']
return dct
@dataclass
@schema_version('catalog', 1)

View File

@@ -5,13 +5,12 @@ from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional, Union, List, Any, Dict, Type, Sequence
from hologram import JsonSchemaMixin
from hologram.helpers import StrEnum
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.graph.manifest import WritableManifest
from dbt.contracts.results import (
TimingInfo,
RunResult, RunResultsArtifact, TimingInfo,
CatalogArtifact,
CatalogResults,
ExecutionResult,
@@ -19,8 +18,7 @@ from dbt.contracts.results import (
FreshnessResult,
RunOperationResult,
RunOperationResultsArtifact,
RunResult,
RunResultsArtifact,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.exceptions import InternalException
@@ -35,16 +33,25 @@ TaskID = uuid.UUID
@dataclass
class RPCParameters(JsonSchemaMixin):
timeout: Optional[float]
class RPCParameters(dbtClassMixin):
task_tags: TaskTags
timeout: Optional[float]
@classmethod
def __pre_deserialize__(cls, data, omit_none=True):
data = super().__pre_deserialize__(data)
if 'timeout' not in data:
data['timeout'] = None
if 'task_tags' not in data:
data['task_tags'] = None
return data
@dataclass
class RPCExecParameters(RPCParameters):
name: str
sql: str
macros: Optional[str]
macros: Optional[str] = None
@dataclass
@@ -80,6 +87,7 @@ class RPCTestParameters(RPCCompileParameters):
data: bool = False
schema: bool = False
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
@@ -132,7 +140,7 @@ class StatusParameters(RPCParameters):
@dataclass
class GCSettings(JsonSchemaMixin):
class GCSettings(dbtClassMixin):
# start evicting the longest-ago-ended tasks here
maxsize: int
# start evicting all tasks before now - auto_reap_age when we have this
@@ -225,12 +233,12 @@ class RemoteCompileResult(RemoteCompileResultMixin):
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
args: Dict[str, Any] = field(default_factory=dict)
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_node_results(
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
@@ -241,11 +249,11 @@ class RemoteExecutionResult(ExecutionResult, RemoteResult):
@classmethod
def from_local_result(
cls,
base: RunResultsArtifact,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.metadata.generated_at,
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
@@ -254,7 +262,7 @@ class RemoteExecutionResult(ExecutionResult, RemoteResult):
@dataclass
class ResultTable(JsonSchemaMixin):
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@@ -411,21 +419,31 @@ class TaskHandlerState(StrEnum):
@dataclass
class TaskTiming(JsonSchemaMixin):
class TaskTiming(dbtClassMixin):
state: TaskHandlerState
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
class TaskRow(TaskTiming):
task_id: TaskID
request_id: Union[str, int]
request_source: str
method: str
timeout: Optional[float]
tags: TaskTags
request_id: Union[str, int]
tags: TaskTags = None
timeout: Optional[float] = None
@dataclass
@@ -451,7 +469,7 @@ class KillResult(RemoteResult):
@dataclass
@schema_version('remote-manifest-result', 1)
class GetManifestResult(RemoteResult):
manifest: Optional[WritableManifest]
manifest: Optional[WritableManifest] = None
# this is kind of carefuly structured: BlocksManifestTasks is implied by
@@ -475,6 +493,16 @@ class PollResult(RemoteResult, TaskTiming):
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
@schema_version('poll-remote-deps-result', 1)

View File

@@ -1,18 +1,18 @@
from dataclasses import dataclass
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin
from typing import List, Dict, Any, Union
@dataclass
class SelectorDefinition(JsonSchemaMixin):
class SelectorDefinition(dbtClassMixin):
name: str
definition: Union[str, Dict[str, Any]]
description: str = ''
@dataclass
class SelectorFile(JsonSchemaMixin):
class SelectorFile(dbtClassMixin):
selectors: List[SelectorDefinition]
version: int = 2

View File

@@ -7,13 +7,12 @@ from typing import (
from dbt.clients.system import write_json, read_json
from dbt.exceptions import (
IncompatibleSchemaException,
InternalException,
RuntimeException,
)
from dbt.version import __version__
from dbt.tracking import get_invocation_id
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin
MacroKey = Tuple[str, str]
SourceKey = Tuple[str, str]
@@ -57,8 +56,10 @@ class Mergeable(Replaceable):
class Writable:
def write(self, path: str, omit_none: bool = False):
write_json(path, self.to_dict(omit_none=omit_none)) # type: ignore
def write(self, path: str):
write_json(
path, self.to_dict(omit_none=False) # type: ignore
)
class AdditionalPropertiesMixin:
@@ -69,22 +70,41 @@ class AdditionalPropertiesMixin:
"""
ADDITIONAL_PROPERTIES = True
# This takes attributes in the dictionary that are
# not in the class definitions and puts them in an
# _extra dict in the class
@classmethod
def from_dict(cls, data, validate=True):
self = super().from_dict(data=data, validate=validate)
keys = self.to_dict(validate=False, omit_none=False)
def __pre_deserialize__(cls, data):
# dir() did not work because fields with
# metadata settings are not found
# The original version of this would create the
# object first and then update extra with the
# extra keys, but that won't work here, so
# we're copying the dict so we don't insert the
# _extra in the original data. This also requires
# that Mashumaro actually build the '_extra' field
cls_keys = cls._get_field_names()
new_dict = {}
for key, value in data.items():
if key not in keys:
self.extra[key] = value
return self
if key not in cls_keys and key != '_extra':
if '_extra' not in new_dict:
new_dict['_extra'] = {}
new_dict['_extra'][key] = value
else:
new_dict[key] = value
data = new_dict
data = super().__pre_deserialize__(data)
return data
def to_dict(self, omit_none=True, validate=False):
data = super().to_dict(omit_none=omit_none, validate=validate)
def __post_serialize__(self, dct):
data = super().__post_serialize__(dct)
data.update(self.extra)
if '_extra' in data:
del data['_extra']
return data
def replace(self, **kwargs):
dct = self.to_dict(omit_none=False, validate=False)
dct = self.to_dict(omit_none=False)
dct.update(kwargs)
return self.from_dict(dct)
@@ -106,7 +126,8 @@ class Readable:
return cls.from_dict(data) # type: ignore
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/dbt/{name}/v{version}.json'
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/'
SCHEMA_PATH = 'dbt/{name}/v{version}.json'
@dataclasses.dataclass
@@ -114,12 +135,16 @@ class SchemaVersion:
name: str
version: int
def __str__(self) -> str:
return BASE_SCHEMAS_URL.format(
@property
def path(self) -> str:
return SCHEMA_PATH.format(
name=self.name,
version=self.version,
version=self.version
)
def __str__(self) -> str:
return BASE_SCHEMAS_URL + self.path
SCHEMA_VERSION_KEY = 'dbt_schema_version'
@@ -135,7 +160,7 @@ def get_metadata_env() -> Dict[str, str]:
@dataclasses.dataclass
class BaseArtifactMetadata(JsonSchemaMixin):
class BaseArtifactMetadata(dbtClassMixin):
dbt_schema_version: str
dbt_version: str = __version__
generated_at: datetime = dataclasses.field(
@@ -158,7 +183,7 @@ def schema_version(name: str, version: int):
@dataclasses.dataclass
class VersionedSchema(JsonSchemaMixin):
class VersionedSchema(dbtClassMixin):
dbt_schema_version: ClassVar[SchemaVersion]
@classmethod
@@ -180,18 +205,9 @@ class ArtifactMixin(VersionedSchema, Writable, Readable):
metadata: BaseArtifactMetadata
@classmethod
def from_dict(
cls: Type[T], data: Dict[str, Any], validate: bool = True
) -> T:
def validate(cls, data):
super().validate(data)
if cls.dbt_schema_version is None:
raise InternalException(
'Cannot call from_dict with no schema version!'
)
if validate:
expected = str(cls.dbt_schema_version)
found = data.get('metadata', {}).get(SCHEMA_VERSION_KEY)
if found != expected:
raise IncompatibleSchemaException(expected, found)
return super().from_dict(data=data, validate=validate)

View File

@@ -0,0 +1,169 @@
from typing import (
Type, ClassVar, cast,
)
import re
from dataclasses import fields
from enum import Enum
from datetime import datetime
from dateutil.parser import parse
from hologram import JsonSchemaMixin, FieldEncoder, ValidationError
# type: ignore
from mashumaro import DataClassDictMixin
from mashumaro.config import (
TO_DICT_ADD_OMIT_NONE_FLAG, BaseConfig as MashBaseConfig
)
from mashumaro.types import SerializableType, SerializationStrategy
class DateTimeSerialization(SerializationStrategy):
def serialize(self, value):
out = value.isoformat()
# Assume UTC if timezone is missing
if value.tzinfo is None:
out = out + "Z"
return out
def deserialize(self, value):
return (
value if isinstance(value, datetime) else parse(cast(str, value))
)
# This class pulls in both JsonSchemaMixin from Hologram and
# DataClassDictMixin from our fork of Mashumaro. The 'to_dict'
# and 'from_dict' methods come from Mashumaro. Building
# jsonschemas for every class and the 'validate' method
# come from Hologram.
class dbtClassMixin(DataClassDictMixin, JsonSchemaMixin):
"""Mixin which adds methods to generate a JSON schema and
convert to and from JSON encodable dicts with validation
against the schema
"""
class Config(MashBaseConfig):
code_generation_options = [
TO_DICT_ADD_OMIT_NONE_FLAG,
]
serialization_strategy = {
datetime: DateTimeSerialization(),
}
_hyphenated: ClassVar[bool] = False
ADDITIONAL_PROPERTIES: ClassVar[bool] = False
# This is called by the mashumaro to_dict in order to handle
# nested classes.
# Munges the dict that's returned.
def __post_serialize__(self, dct):
if self._hyphenated:
new_dict = {}
for key in dct:
if '_' in key:
new_key = key.replace('_', '-')
new_dict[new_key] = dct[key]
else:
new_dict[key] = dct[key]
dct = new_dict
return dct
# This is called by the mashumaro _from_dict method, before
# performing the conversion to a dict
@classmethod
def __pre_deserialize__(cls, data):
# `data` might not be a dict, e.g. for `query_comment`, which accepts
# a dict or a string; only snake-case for dict values.
if cls._hyphenated and isinstance(data, dict):
new_dict = {}
for key in data:
if '-' in key:
new_key = key.replace('-', '_')
new_dict[new_key] = data[key]
else:
new_dict[key] = data[key]
data = new_dict
return data
# This is used in the hologram._encode_field method, which calls
# a 'to_dict' method which does not have the same parameters in
# hologram and in mashumaro.
def _local_to_dict(self, **kwargs):
args = {}
if 'omit_none' in kwargs:
args['omit_none'] = kwargs['omit_none']
return self.to_dict(**args)
class ValidatedStringMixin(str, SerializableType):
ValidationRegex = ''
@classmethod
def _deserialize(cls, value: str) -> 'ValidatedStringMixin':
cls.validate(value)
return ValidatedStringMixin(value)
def _serialize(self) -> str:
return str(self)
@classmethod
def validate(cls, value):
res = re.match(cls.ValidationRegex, value)
if res is None:
raise ValidationError(f"Invalid value: {value}") # TODO
# These classes must be in this order or it doesn't work
class StrEnum(str, SerializableType, Enum):
def __str__(self):
return self.value
# https://docs.python.org/3.6/library/enum.html#using-automatic-values
def _generate_next_value_(name, *_):
return name
def _serialize(self) -> str:
return self.value
@classmethod
def _deserialize(cls, value: str):
return cls(value)
class HyphenatedDbtClassMixin(dbtClassMixin):
# used by from_dict/to_dict
_hyphenated: ClassVar[bool] = True
# used by jsonschema validation, _get_fields
@classmethod
def field_mapping(cls):
result = {}
for field in fields(cls):
skip = field.metadata.get("preserve_underscore")
if skip:
continue
if "_" in field.name:
result[field.name] = field.name.replace("_", "-")
return result
class ExtensibleDbtClassMixin(dbtClassMixin):
ADDITIONAL_PROPERTIES = True
# This is used by Hologram in jsonschema validation
def register_pattern(base_type: Type, pattern: str) -> None:
"""base_type should be a typing.NewType that should always have the given
regex pattern. That means that its underlying type ('__supertype__') had
better be a str!
"""
class PatternEncoder(FieldEncoder):
@property
def json_schema(self):
return {"type": "string", "pattern": pattern}
dbtClassMixin.register_field_encoders({base_type: PatternEncoder()})

View File

@@ -48,7 +48,18 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
return self.revision
def nice_version_name(self):
return 'revision {}'.format(self.revision)
if self.revision == 'HEAD':
return 'HEAD (default revision)'
else:
return 'revision {}'.format(self.revision)
def unpinned_msg(self):
if self.revision == 'HEAD':
return 'not pinned, using HEAD (default branch)'
elif self.revision in ('main', 'master'):
return f'pinned to the "{self.revision}" branch'
else:
return None
def _checkout(self):
"""Performs a shallow clone of the repository into the downloads
@@ -57,7 +68,7 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
the path to the checked out directory."""
try:
dir_ = git.clone_and_checkout(
self.git, get_downloads_path(), branch=self.revision,
self.git, get_downloads_path(), revision=self.revision,
dirname=self._checkout_name
)
except ExecutableError as exc:
@@ -72,11 +83,12 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
def _fetch_metadata(self, project, renderer) -> ProjectPackageMetadata:
path = self._checkout()
if self.revision == 'master' and self.warn_unpinned:
if self.unpinned_msg() and self.warn_unpinned:
warn_or_error(
'The git package "{}" is not pinned.\n\tThis can introduce '
'The git package "{}" \n\tis {}.\n\tThis can introduce '
'breaking changes into your project without warning!\n\nSee {}'
.format(self.git, PIN_PACKAGE_URL),
.format(self.git, self.unpinned_msg(), PIN_PACKAGE_URL),
log_fmt=ui.yellow('WARNING: {}')
)
loaded = Project.from_project_root(path, renderer)
@@ -133,7 +145,7 @@ class GitUnpinnedPackage(GitPackageMixin, UnpinnedPackage[GitPinnedPackage]):
def resolved(self) -> GitPinnedPackage:
requested = set(self.revisions)
if len(requested) == 0:
requested = {'master'}
requested = {'HEAD'}
elif len(requested) > 1:
raise_dependency_error(
'git dependencies should contain exactly one version. '

View File

@@ -7,14 +7,14 @@ from dbt.node_types import NodeType
from dbt import flags
from dbt.ui import line_wrap_message
import hologram
import dbt.dataclass_schema
def validator_error_message(exc):
"""Given a hologram.ValidationError (which is basically a
"""Given a dbt.dataclass_schema.ValidationError (which is basically a
jsonschema.ValidationError), return the relevant parts as a string
"""
if not isinstance(exc, hologram.ValidationError):
if not isinstance(exc, dbt.dataclass_schema.ValidationError):
return str(exc)
path = "[%s]" % "][".join(map(repr, exc.relative_path))
return 'at path {}: {}'.format(path, exc.message)

View File

@@ -1,5 +1,8 @@
import os
import multiprocessing
if os.name != 'nt':
# https://bugs.python.org/issue41567
import multiprocessing.popen_spawn_posix # type: ignore
from pathlib import Path
from typing import Optional

View File

@@ -1,6 +1,6 @@
# special support for CLI argument parsing.
import itertools
import yaml
from dbt.clients.yaml_helper import yaml, Loader, Dumper # noqa: F401
from typing import (
Dict, List, Optional, Tuple, Any, Union
@@ -236,7 +236,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
)
# if key isn't a valid method name, this will raise
base = SelectionCriteria.from_dict(definition, dct)
base = SelectionCriteria.selection_criteria_from_dict(definition, dct)
if diff_arg is None:
return base
else:

View File

@@ -25,8 +25,8 @@ def get_package_names(nodes):
def alert_non_existence(raw_spec, nodes):
if len(nodes) == 0:
warn_or_error(
f"The selector '{str(raw_spec)}' does not match any nodes and will"
f" be ignored"
f"The selection criterion '{str(raw_spec)}' does not match"
f" any nodes"
)

View File

@@ -3,7 +3,7 @@ from itertools import chain
from pathlib import Path
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional
from hologram.helpers import StrEnum
from dbt.dataclass_schema import StrEnum
from .graph import UniqueId

View File

@@ -102,7 +102,9 @@ class SelectionCriteria:
return method_name, method_arguments
@classmethod
def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria':
def selection_criteria_from_dict(
cls, raw: Any, dct: Dict[str, Any]
) -> 'SelectionCriteria':
if 'value' not in dct:
raise RuntimeException(
f'Invalid node spec "{raw}" - no search value!'
@@ -150,7 +152,7 @@ class SelectionCriteria:
# bad spec!
raise RuntimeException(f'Invalid selector spec "{raw}"')
return cls.from_dict(raw, result.groupdict())
return cls.selection_criteria_from_dict(raw, result.groupdict())
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):

View File

@@ -2,14 +2,27 @@
from dataclasses import dataclass
from datetime import timedelta
from pathlib import Path
from typing import NewType, Tuple, AbstractSet
from typing import Tuple, AbstractSet, Union
from hologram import (
FieldEncoder, JsonSchemaMixin, JsonDict, ValidationError
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError, StrEnum,
)
from hologram.helpers import StrEnum
from hologram import FieldEncoder, JsonDict
from mashumaro.types import SerializableType
Port = NewType('Port', int)
class Port(int, SerializableType):
@classmethod
def _deserialize(cls, value: Union[int, str]) -> 'Port':
try:
value = int(value)
except ValueError:
raise ValidationError(f'Cannot encode {value} into port number')
return Port(value)
def _serialize(self) -> int:
return self
class PortEncoder(FieldEncoder):
@@ -66,12 +79,12 @@ class NVEnum(StrEnum):
@dataclass
class NoValue(JsonSchemaMixin):
class NoValue(dbtClassMixin):
"""Sometimes, you want a way to say none that isn't None"""
novalue: NVEnum = NVEnum.novalue
JsonSchemaMixin.register_field_encoders({
dbtClassMixin.register_field_encoders({
Port: PortEncoder(),
timedelta: TimeDeltaFieldEncoder(),
Path: PathEncoder(),

View File

@@ -1,4 +1,4 @@
from hologram.helpers import StrEnum
from dbt.dataclass_schema import StrEnum
import json
from typing import Union, Dict, Any

View File

@@ -7,15 +7,15 @@
{{ write(sql) }}
{%- endif -%}
{%- set status, res = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}
{%- set res, table = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}
{%- if name is not none -%}
{{ store_result(name, status=status, agate_table=res) }}
{{ store_result(name, response=res, agate_table=table) }}
{%- endif -%}
{%- endif -%}
{%- endmacro %}
{% macro noop_statement(name=None, status=None, res=None) -%}
{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}
{%- set sql = caller() -%}
{%- if name == 'main' -%}
@@ -24,7 +24,7 @@
{%- endif -%}
{%- if name is not none -%}
{{ store_result(name, status=status, agate_table=res) }}
{{ store_raw_result(name, message=message, code=code, rows_affected=rows_affected, agate_table=res) }}
{%- endif -%}
{%- endmacro %}

View File

@@ -18,6 +18,7 @@
{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}
{%- set predicates = [] if predicates is none else [] + predicates -%}
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
{%- set update_columns = config.get('merge_update_columns', default = dest_columns | map(attribute="quoted") | list) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{% if unique_key %}
@@ -37,8 +38,8 @@
{% if unique_key %}
when matched then update set
{% for column in dest_columns -%}
{{ adapter.quote(column.name) }} = DBT_INTERNAL_SOURCE.{{ adapter.quote(column.name) }}
{% for column_name in update_columns -%}
{{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}
{%- if not loop.last %}, {%- endif %}
{%- endfor %}
{% endif %}

View File

@@ -112,7 +112,7 @@
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{%- set agate_table = load_agate_table() -%}
{%- do store_result('agate_table', status='OK', agate_table=agate_table) -%}
{%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
@@ -129,11 +129,11 @@
{% set create_table_sql = create_csv_table(model, agate_table) %}
{% endif %}
{% set status = 'CREATE' if full_refresh_mode else 'INSERT' %}
{% set num_rows = (agate_table.rows | length) %}
{% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}
{% set rows_affected = (agate_table.rows | length) %}
{% set sql = load_csv_rows(model, agate_table) %}
{% call noop_statement('main', status ~ ' ' ~ num_rows) %}
{% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}
{{ create_table_sql }};
-- dbt seed --
{{ sql }}

View File

@@ -13,13 +13,7 @@
when matched
and DBT_INTERNAL_DEST.dbt_valid_to is null
and DBT_INTERNAL_SOURCE.dbt_change_type = 'update'
then update
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
when matched
and DBT_INTERNAL_DEST.dbt_valid_to is null
and DBT_INTERNAL_SOURCE.dbt_change_type = 'delete'
and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')
then update
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to

View File

@@ -164,7 +164,11 @@
{%- for col in check_cols -%}
{{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}
or
({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null)
(
(({{ snapshotted_rel }}.{{ col }} is null) and not ({{ current_rel }}.{{ col }} is null))
or
((not {{ snapshotted_rel }}.{{ col }} is null) and ({{ current_rel }}.{{ col }} is null))
)
{%- if not loop.last %} or {% endif -%}
{%- endfor -%}
{%- endif -%}

View File

@@ -0,0 +1,10 @@
{%- materialization test, default -%}
{% call statement('main', fetch_result=True) -%}
select count(*) as validation_errors
from (
{{ sql }}
) _dbt_internal_test
{%- endcall %}
{%- endmaterialization -%}

View File

@@ -36,8 +36,7 @@ from validation_errors
{% endmacro %}
{% macro test_accepted_values(model, values) %}
{% test accepted_values(model, values) %}
{% set macro = adapter.dispatch('test_accepted_values') %}
{{ macro(model, values, **kwargs) }}
{% endmacro %}
{% endtest %}

View File

@@ -9,9 +9,7 @@ where {{ column_name }} is null
{% endmacro %}
{% macro test_not_null(model) %}
{% test not_null(model) %}
{% set macro = adapter.dispatch('test_not_null') %}
{{ macro(model, **kwargs) }}
{% endmacro %}
{% endtest %}

View File

@@ -18,7 +18,7 @@ where child.id is not null
{% macro test_relationships(model, to, field) %}
{% test relationships(model, to, field) %}
{% set macro = adapter.dispatch('test_relationships') %}
{{ macro(model, to, field, **kwargs) }}
{% endmacro %}
{% endtest %}

View File

@@ -19,7 +19,7 @@ from (
{% endmacro %}
{% macro test_unique(model) %}
{% test unique(model) %}
{% set macro = adapter.dispatch('test_unique') %}
{{ macro(model, **kwargs) }}
{% endmacro %}
{% endtest %}

File diff suppressed because one or more lines are too long

View File

@@ -13,7 +13,7 @@ from typing import Optional, List, ContextManager, Callable, Dict, Any, Set
import colorama
import logbook
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin
# Colorama needs some help on windows because we're using logger.info
# intead of print(). If the Windows env doesn't have a TERM var set,
@@ -45,11 +45,10 @@ DEBUG_LOG_FORMAT = (
ExceptionInformation = str
Extras = Dict[str, Any]
@dataclass
class LogMessage(JsonSchemaMixin):
class LogMessage(dbtClassMixin):
timestamp: datetime
message: str
channel: str
@@ -57,7 +56,7 @@ class LogMessage(JsonSchemaMixin):
levelname: str
thread_name: str
process: int
extra: Optional[Extras] = None
extra: Optional[Dict[str, Any]] = None
exc_info: Optional[ExceptionInformation] = None
@classmethod
@@ -96,7 +95,8 @@ class JsonFormatter(LogMessageFormatter):
# utils imports exceptions which imports logger...
import dbt.utils
log_message = super().__call__(record, handler)
return json.dumps(log_message.to_dict(), cls=dbt.utils.JSONEncoder)
dct = log_message.to_dict(omit_none=True)
return json.dumps(dct, cls=dbt.utils.JSONEncoder)
class FormatterMixin:
@@ -128,6 +128,7 @@ class OutputHandler(logbook.StreamHandler, FormatterMixin):
The `format_string` parameter only changes the default text output, not
debug mode or json.
"""
def __init__(
self,
stream,
@@ -215,13 +216,14 @@ class TextOnly(logbook.Processor):
class TimingProcessor(logbook.Processor):
def __init__(self, timing_info: Optional[JsonSchemaMixin] = None):
def __init__(self, timing_info: Optional[dbtClassMixin] = None):
self.timing_info = timing_info
super().__init__()
def process(self, record):
if self.timing_info is not None:
record.extra['timing_info'] = self.timing_info.to_dict()
record.extra['timing_info'] = self.timing_info.to_dict(
omit_none=True)
class DbtProcessState(logbook.Processor):
@@ -350,6 +352,7 @@ def make_log_dir_if_missing(log_dir):
class DebugWarnings(logbook.compat.redirected_warnings):
"""Log warnings, except send them to 'debug' instead of 'warning' level.
"""
def make_record(self, message, exception, filename, lineno):
rv = super().make_record(message, exception, filename, lineno)
rv.level = logbook.DEBUG

View File

@@ -446,6 +446,21 @@ def _build_snapshot_subparser(subparsers, base_subparser):
return sub
def _add_defer_argument(*subparsers):
for sub in subparsers:
sub.add_optional_argument_inverse(
'--defer',
enable_help='''
If set, defer to the state variable for resolving unselected nodes.
''',
disable_help='''
If set, do not defer to the state variable for resolving unselected
nodes.
''',
default=flags.DEFER_MODE,
)
def _build_run_subparser(subparsers, base_subparser):
run_sub = subparsers.add_parser(
'run',
@@ -463,19 +478,6 @@ def _build_run_subparser(subparsers, base_subparser):
'''
)
# this is a "dbt run"-only thing, for now
run_sub.add_optional_argument_inverse(
'--defer',
enable_help='''
If set, defer to the state variable for resolving unselected nodes.
''',
disable_help='''
If set, do not defer to the state variable for resolving unselected
nodes.
''',
default=flags.DEFER_MODE,
)
run_sub.set_defaults(cls=run_task.RunTask, which='run', rpc_method='run')
return run_sub
@@ -1033,6 +1035,8 @@ def parse_args(args, cls=DBTArgumentParser):
# list_sub sets up its own arguments.
_add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub)
_add_selection_arguments(snapshot_sub, seed_sub, models_name='select')
# --defer
_add_defer_argument(run_sub, test_sub)
# --full-refresh
_add_table_mutability_arguments(run_sub, compile_sub)

View File

@@ -1,6 +1,6 @@
from typing import List
from hologram.helpers import StrEnum
from dbt.dataclass_schema import StrEnum
class NodeType(StrEnum):
@@ -46,6 +46,7 @@ class NodeType(StrEnum):
cls.Source,
cls.Macro,
cls.Analysis,
cls.Exposure
]
def pluralize(self) -> str:

View File

@@ -5,12 +5,11 @@ from .docs import DocumentationParser # noqa
from .hooks import HookParser # noqa
from .macros import MacroParser # noqa
from .models import ModelParser # noqa
from .results import ParseResult # noqa
from .schemas import SchemaParser # noqa
from .seeds import SeedParser # noqa
from .snapshots import SnapshotParser # noqa
from . import ( # noqa
analysis, base, data_test, docs, hooks, macros, models, results, schemas,
analysis, base, data_test, docs, hooks, macros, models, schemas,
snapshots
)

View File

@@ -3,17 +3,14 @@ import os
from dbt.contracts.graph.parsed import ParsedAnalysisNode
from dbt.node_types import NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FilesystemSearcher, FileBlock
from dbt.parser.search import FileBlock
class AnalysisParser(SimpleSQLParser[ParsedAnalysisNode]):
def get_paths(self):
return FilesystemSearcher(
self.project, self.project.analysis_paths, '.sql'
)
def parse_from_dict(self, dct, validate=True) -> ParsedAnalysisNode:
return ParsedAnalysisNode.from_dict(dct, validate=validate)
if validate:
ParsedAnalysisNode.validate(dct)
return ParsedAnalysisNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:

View File

@@ -2,14 +2,13 @@ import abc
import itertools
import os
from typing import (
List, Dict, Any, Iterable, Generic, TypeVar
List, Dict, Any, Generic, TypeVar
)
from hologram import ValidationError
from dbt.dataclass_schema import ValidationError
from dbt import utils
from dbt.clients.jinja import MacroGenerator
from dbt.clients.system import load_file_contents
from dbt.context.providers import (
generate_parser_model,
generate_generate_component_name_macro,
@@ -20,18 +19,14 @@ from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import (
ContextConfig
)
from dbt.contracts.files import (
SourceFile, FilePath, FileHash
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.parsed import HasUniqueID
from dbt.contracts.graph.parsed import HasUniqueID, ManifestNodes
from dbt.contracts.graph.unparsed import UnparsedNode
from dbt.exceptions import (
CompilationException, validator_error_message, InternalException
)
from dbt import hooks
from dbt.node_types import NodeType
from dbt.parser.results import ParseResult, ManifestNodes
from dbt.parser.search import FileBlock
# internally, the parser may store a less-restrictive type that will be
@@ -48,20 +43,9 @@ ConfiguredBlockType = TypeVar('ConfiguredBlockType', bound=FileBlock)
class BaseParser(Generic[FinalValue]):
def __init__(self, results: ParseResult, project: Project) -> None:
self.results = results
def __init__(self, project: Project, manifest: Manifest) -> None:
self.project = project
# this should be a superset of [x.path for x in self.results.files]
# because we fill it via search()
self.searched: List[FilePath] = []
@abc.abstractmethod
def get_paths(self) -> Iterable[FilePath]:
pass
def search(self) -> List[FilePath]:
self.searched = list(self.get_paths())
return self.searched
self.manifest = manifest
@abc.abstractmethod
def parse_file(self, block: FileBlock) -> None:
@@ -77,38 +61,22 @@ class BaseParser(Generic[FinalValue]):
self.project.project_name,
resource_name)
def load_file(
self,
path: FilePath,
*,
set_contents: bool = True,
) -> SourceFile:
file_contents = load_file_contents(path.absolute_path, strip=False)
checksum = FileHash.from_contents(file_contents)
source_file = SourceFile(path=path, checksum=checksum)
if set_contents:
source_file.contents = file_contents.strip()
else:
source_file.contents = ''
return source_file
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
def __init__(
self,
results: ParseResult,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
macro_manifest: Manifest,
) -> None:
super().__init__(results, project)
super().__init__(project, manifest)
self.root_project = root_project
self.macro_manifest = macro_manifest
class RelationUpdate:
def __init__(
self, config: RuntimeConfig, manifest: Manifest, component: str
self, config: RuntimeConfig, manifest: Manifest,
component: str
) -> None:
macro = manifest.find_generate_macro_by_name(
component=component,
@@ -141,21 +109,23 @@ class ConfiguredParser(
):
def __init__(
self,
results: ParseResult,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
macro_manifest: Manifest,
) -> None:
super().__init__(results, project, root_project, macro_manifest)
super().__init__(project, manifest, root_project)
self._update_node_database = RelationUpdate(
manifest=macro_manifest, config=root_project, component='database'
manifest=manifest, config=root_project,
component='database'
)
self._update_node_schema = RelationUpdate(
manifest=macro_manifest, config=root_project, component='schema'
manifest=manifest, config=root_project,
component='schema'
)
self._update_node_alias = RelationUpdate(
manifest=macro_manifest, config=root_project, component='alias'
manifest=manifest, config=root_project,
component='alias'
)
@abc.abstractclassmethod
@@ -248,11 +218,11 @@ class ConfiguredParser(
'raw_sql': block.contents,
'unique_id': self.generate_unique_id(name),
'config': self.config_dict(config),
'checksum': block.file.checksum.to_dict(),
'checksum': block.file.checksum.to_dict(omit_none=True),
}
dct.update(kwargs)
try:
return self.parse_from_dict(dct)
return self.parse_from_dict(dct, validate=True)
except ValidationError as exc:
msg = validator_error_message(exc)
# this is a bit silly, but build an UnparsedNode just for error
@@ -269,31 +239,35 @@ class ConfiguredParser(
self, parsed_node: IntermediateNode, config: ContextConfig
) -> Dict[str, Any]:
return generate_parser_model(
parsed_node, self.root_project, self.macro_manifest, config
parsed_node, self.root_project, self.manifest, config
)
def render_with_context(
self, parsed_node: IntermediateNode, config: ContextConfig
) -> None:
"""Given the parsed node and a ContextConfig to use during parsing,
render the node's sql wtih macro capture enabled.
# Given the parsed node and a ContextConfig to use during parsing,
# render the node's sql wtih macro capture enabled.
# Note: this mutates the config object when config calls are rendered.
Note: this mutates the config object when config() calls are rendered.
"""
# during parsing, we don't have a connection, but we might need one, so
# we have to acquire it.
with get_adapter(self.root_project).connection_for(parsed_node):
context = self._context_for(parsed_node, config)
# this goes through the process of rendering, but just throws away
# the rendered result. The "macro capture" is the point?
get_rendered(
parsed_node.raw_sql, context, parsed_node, capture_macros=True
)
# This is taking the original config for the node, converting it to a dict,
# updating the config with new config passed in, then re-creating the
# config from the dict in the node.
def update_parsed_node_config(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
# Overwrite node config
final_config_dict = parsed_node.config.to_dict()
final_config_dict = parsed_node.config.to_dict(omit_none=True)
final_config_dict.update(config_dict)
# re-mangle hooks, in case we got new ones
self._mangle_hooks(final_config_dict)
@@ -378,9 +352,9 @@ class ConfiguredParser(
def add_result_node(self, block: FileBlock, node: ManifestNodes):
if node.config.enabled:
self.results.add_node(block.file, node)
self.manifest.add_node(block.file, node)
else:
self.results.add_disabled(block.file, node)
self.manifest.add_disabled(block.file, node)
def parse_node(self, block: ConfiguredBlockType) -> FinalNode:
compiled_path: str = self.get_compiled_path(block)

View File

@@ -1,18 +1,15 @@
from dbt.contracts.graph.parsed import ParsedDataTestNode
from dbt.node_types import NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FilesystemSearcher, FileBlock
from dbt.parser.search import FileBlock
from dbt.utils import get_pseudo_test_path
class DataTestParser(SimpleSQLParser[ParsedDataTestNode]):
def get_paths(self):
return FilesystemSearcher(
self.project, self.project.test_paths, '.sql'
)
def parse_from_dict(self, dct, validate=True) -> ParsedDataTestNode:
return ParsedDataTestNode.from_dict(dct, validate=validate)
if validate:
ParsedDataTestNode.validate(dct)
return ParsedDataTestNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:

View File

@@ -7,7 +7,7 @@ from dbt.contracts.graph.parsed import ParsedDocumentation
from dbt.node_types import NodeType
from dbt.parser.base import Parser
from dbt.parser.search import (
BlockContents, FileBlock, FilesystemSearcher, BlockSearcher
BlockContents, FileBlock, BlockSearcher
)
@@ -15,13 +15,6 @@ SHOULD_PARSE_RE = re.compile(r'{[{%]')
class DocumentationParser(Parser[ParsedDocumentation]):
def get_paths(self):
return FilesystemSearcher(
project=self.project,
relative_dirs=self.project.docs_paths,
extension='.md',
)
@property
def resource_type(self) -> NodeType:
return NodeType.Documentation
@@ -60,6 +53,4 @@ class DocumentationParser(Parser[ParsedDocumentation]):
)
for block in searcher:
for parsed in self.parse_block(block):
self.results.add_doc(file_block.file, parsed)
# mark the file as seen, even if there are no macros in it
self.results.get_file(file_block.file)
self.manifest.add_doc(file_block.file, parsed)

View File

@@ -70,16 +70,19 @@ class HookParser(SimpleParser[HookBlock, ParsedHookNode]):
def transform(self, node):
return node
def get_paths(self) -> List[FilePath]:
# Hooks are only in the dbt_project.yml file for the project
def get_path(self) -> FilePath:
path = FilePath(
project_root=self.project.project_root,
searched_path='.',
relative_path='dbt_project.yml',
)
return [path]
return path
def parse_from_dict(self, dct, validate=True) -> ParsedHookNode:
return ParsedHookNode.from_dict(dct, validate=validate)
if validate:
ParsedHookNode.validate(dct)
return ParsedHookNode.from_dict(dct)
@classmethod
def get_compiled_path(cls, block: HookBlock):

View File

@@ -5,6 +5,7 @@ import jinja2
from dbt.clients import jinja
from dbt.contracts.graph.unparsed import UnparsedMacro
from dbt.contracts.graph.parsed import ParsedMacro
from dbt.contracts.files import FilePath
from dbt.exceptions import CompilationException
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
@@ -14,12 +15,14 @@ from dbt.utils import MACRO_PREFIX
class MacroParser(BaseParser[ParsedMacro]):
def get_paths(self):
return FilesystemSearcher(
# This is only used when creating a MacroManifest separate
# from the normal parsing flow.
def get_paths(self) -> List[FilePath]:
return list(FilesystemSearcher(
project=self.project,
relative_dirs=self.project.macro_paths,
extension='.sql',
)
))
@property
def resource_type(self) -> NodeType:
@@ -53,7 +56,7 @@ class MacroParser(BaseParser[ParsedMacro]):
t for t in
jinja.extract_toplevel_blocks(
base_node.raw_sql,
allowed_blocks={'macro', 'materialization'},
allowed_blocks={'macro', 'materialization', 'test'},
collect_raw_data=False,
)
if isinstance(t, jinja.BlockTag)
@@ -90,7 +93,7 @@ class MacroParser(BaseParser[ParsedMacro]):
def parse_file(self, block: FileBlock):
# mark the file as seen, even if there are no macros in it
self.results.get_file(block.file)
self.manifest.get_file(block.file)
source_file = block.file
original_file_path = source_file.path.original_file_path
@@ -108,4 +111,4 @@ class MacroParser(BaseParser[ParsedMacro]):
)
for node in self.parse_unparsed_macros(base_node):
self.results.add_macro(block.file, node)
self.manifest.add_macro(block.file, node)

View File

@@ -1,29 +1,40 @@
from dataclasses import dataclass
from dataclasses import field
import os
import pickle
from typing import (
Dict, Optional, Mapping, Callable, Any, List, Type, Union, MutableMapping
Dict, Optional, Mapping, Callable, Any, List, Type, Union
)
import time
import dbt.exceptions
import dbt.tracking
import dbt.flags as flags
from dbt.adapters.factory import (
get_adapter,
get_relation_class_by_name,
get_adapter_package_names,
)
from dbt.helper_types import PathSet
from dbt.logger import GLOBAL_LOGGER as logger, DbtProcessState
from dbt.node_types import NodeType
from dbt.clients.jinja import get_rendered
from dbt.clients.jinja import get_rendered, statically_extract_macro_calls
from dbt.clients.system import make_directory
from dbt.config import Project, RuntimeConfig
from dbt.context.docs import generate_runtime_docs
from dbt.contracts.files import FilePath, FileHash
from dbt.context.macro_resolver import MacroResolver
from dbt.context.base import generate_base_context
from dbt.contracts.files import FileHash, ParseFileType
from dbt.parser.read_files import read_files, load_source_file
from dbt.contracts.graph.compiled import ManifestNode
from dbt.contracts.graph.manifest import Manifest, Disabled
from dbt.contracts.graph.manifest import (
Manifest, Disabled, MacroManifest, ManifestStateCheck
)
from dbt.contracts.graph.parsed import (
ParsedSourceDefinition, ParsedNode, ParsedMacro, ColumnInfo, ParsedExposure
)
from dbt.contracts.util import Writable
from dbt.exceptions import (
ref_target_not_found,
get_target_not_found_or_disabled_msg,
@@ -38,7 +49,6 @@ from dbt.parser.docs import DocumentationParser
from dbt.parser.hooks import HookParser
from dbt.parser.macros import MacroParser
from dbt.parser.models import ModelParser
from dbt.parser.results import ParseResult
from dbt.parser.schemas import SchemaParser
from dbt.parser.search import FileBlock
from dbt.parser.seeds import SeedParser
@@ -47,60 +57,47 @@ from dbt.parser.sources import patch_sources
from dbt.ui import warning_tag
from dbt.version import __version__
from dbt.dataclass_schema import dbtClassMixin
PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle'
PARSING_STATE = DbtProcessState('parsing')
DEFAULT_PARTIAL_PARSE = False
_parser_types: List[Type[Parser]] = [
ModelParser,
SnapshotParser,
AnalysisParser,
DataTestParser,
HookParser,
SeedParser,
DocumentationParser,
SchemaParser,
]
# Part of saved performance info
@dataclass
class ParserInfo(dbtClassMixin):
parser: str
elapsed: float
path_count: int = 0
# TODO: this should be calculated per-file based on the vars() calls made in
# parsing, so changing one var doesn't invalidate everything. also there should
# be something like that for env_var - currently changing env_vars in way that
# impact graph selection or configs will result in weird test failures.
# finally, we should hash the actual profile used, not just root project +
# profiles.yml + relevant args. While sufficient, it is definitely overkill.
def make_parse_result(
config: RuntimeConfig, all_projects: Mapping[str, Project]
) -> ParseResult:
"""Make a ParseResult from the project configuration and the profile."""
# if any of these change, we need to reject the parser
vars_hash = FileHash.from_contents(
'\x00'.join([
getattr(config.args, 'vars', '{}') or '{}',
getattr(config.args, 'profile', '') or '',
getattr(config.args, 'target', '') or '',
__version__
])
)
profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
project_hashes = {}
for name, project in all_projects.items():
path = os.path.join(project.project_root, 'dbt_project.yml')
with open(path) as fp:
project_hashes[name] = FileHash.from_contents(fp.read())
return ParseResult(
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
)
# Part of saved performance info
@dataclass
class ProjectLoaderInfo(dbtClassMixin):
project_name: str
elapsed: float
parsers: List[ParserInfo]
path_count: int = 0
# Part of saved performance info
@dataclass
class ManifestLoaderInfo(dbtClassMixin, Writable):
path_count: int = 0
is_partial_parse_enabled: Optional[bool] = None
read_files_elapsed: Optional[float] = None
load_macros_elapsed: Optional[float] = None
parse_project_elapsed: Optional[float] = None
patch_sources_elapsed: Optional[float] = None
process_manifest_elapsed: Optional[float] = None
load_all_elapsed: Optional[float] = None
projects: List[ProjectLoaderInfo] = field(default_factory=list)
# The ManifestLoader loads the manifest. The standard way to use the
# ManifestLoader is using the 'get_full_manifest' class method, but
# many tests use abbreviated processes.
class ManifestLoader:
def __init__(
self,
@@ -110,164 +107,269 @@ class ManifestLoader:
) -> None:
self.root_project: RuntimeConfig = root_project
self.all_projects: Mapping[str, Project] = all_projects
self.manifest: Manifest = Manifest({}, {}, {}, {}, {}, {}, [], {})
self.manifest.metadata = root_project.get_metadata()
# This is a MacroQueryStringSetter callable, which is called
# later after we set the MacroManifest in the adapter. It sets
# up the query headers.
self.macro_hook: Callable[[Manifest], Any]
if macro_hook is None:
self.macro_hook = lambda m: None
else:
self.macro_hook = macro_hook
self.results: ParseResult = make_parse_result(
root_project, all_projects,
self._perf_info = ManifestLoaderInfo(
is_partial_parse_enabled=self._partial_parse_enabled()
)
self._loaded_file_cache: Dict[str, FileBlock] = {}
partial_parse = self._partial_parse_enabled()
self._perf_info: Dict[str, Any] = {
'path_count': 0, 'projects': [],
'is_partial_parse_enabled': partial_parse}
# State check determines whether the old_manifest and the current
# manifest match well enough to do partial parsing
self.manifest.state_check = self.build_manifest_state_check()
# This is a saved manifest from a previous run that's used for partial parsing
self.old_manifest: Optional[Manifest] = self.read_saved_manifest()
# This is the method that builds a complete manifest. We sometimes
# use an abbreviated process in tests.
@classmethod
def get_full_manifest(
cls,
config: RuntimeConfig,
*,
reset: bool = False,
) -> Manifest:
adapter = get_adapter(config) # type: ignore
# reset is set in a TaskManager load_manifest call, since
# the config and adapter may be persistent.
if reset:
config.clear_dependencies()
adapter.clear_macro_manifest()
macro_hook = adapter.connections.set_query_header
with PARSING_STATE: # set up logbook.Processor for parsing
# Start performance counting
start_load_all = time.perf_counter()
projects = config.load_dependencies()
loader = ManifestLoader(config, projects, macro_hook)
loader.load()
# The goal is to move partial parse writing to after update_manifest
loader.write_manifest_for_partial_parse()
manifest = loader.update_manifest()
# Move write_manifest_for_partial_parse here
_check_manifest(manifest, config)
manifest.build_flat_graph()
# This needs to happen after loading from a partial parse,
# so that the adapter has the query headers from the macro_hook.
loader.save_macros_to_adapter(adapter)
# Save performance info
loader._perf_info.load_all_elapsed = (
time.perf_counter() - start_load_all
)
loader.track_project_load()
return manifest
# This is where the main action happens
def load(self):
if self.old_manifest is not None:
logger.debug('Got an acceptable saved parse result')
# Read files creates a dictionary of projects to a dictionary
# of parsers to lists of file strings. The file strings are
# used to get the SourceFiles from the manifest files.
# In the future the loaded files will be used to control
# partial parsing, but right now we're just moving the
# file loading out of the individual parsers and doing it
# all at once.
start_read_files = time.perf_counter()
project_parser_files = {}
for project in self.all_projects.values():
read_files(project, self.manifest.files, project_parser_files)
self._perf_info.read_files_elapsed = (time.perf_counter() - start_read_files)
# We need to parse the macros first, so they're resolvable when
# the other files are loaded
start_load_macros = time.perf_counter()
for project in self.all_projects.values():
parser = MacroParser(project, self.manifest)
parser_files = project_parser_files[project.project_name]
for search_key in parser_files['MacroParser']:
block = FileBlock(self.manifest.files[search_key])
self.parse_with_cache(block, parser)
self.reparse_macros()
# This is where a loop over self.manifest.macros should be performed
# to set the 'depends_on' information from static rendering.
self._perf_info.load_macros_elapsed = (time.perf_counter() - start_load_macros)
# Now that the macros are parsed, parse the rest of the files.
# This is currently done on a per project basis,
# but in the future we may change that
start_parse_projects = time.perf_counter()
for project in self.all_projects.values():
self.parse_project(project, project_parser_files[project.project_name])
self._perf_info.parse_project_elapsed = (time.perf_counter() - start_parse_projects)
# Parse every file in this project, except macros (already done)
def parse_project(
self,
project: Project,
parser_files
) -> None:
project_parser_info: List[ParserInfo] = []
start_timer = time.perf_counter()
total_path_count = 0
# Loop through parsers with loaded files. Note: SchemaParser must be last
parser_types: List[Type[Parser]] = [
ModelParser, SnapshotParser, AnalysisParser, DataTestParser,
SeedParser, DocumentationParser, SchemaParser]
for parser_cls in parser_types:
parser_name = parser_cls.__name__
# No point in creating a parser if we don't have files for it
if parser_name not in parser_files or not parser_files[parser_name]:
continue
# Initialize timing info
parser_path_count = 0
parser_start_timer = time.perf_counter()
# Parse the project files for this parser
parser: Parser = parser_cls(project, self.manifest, self.root_project)
for search_key in parser_files[parser_name]:
block = FileBlock(self.manifest.files[search_key])
self.parse_with_cache(block, parser)
parser_path_count = parser_path_count + 1
# Save timing info
project_parser_info.append(ParserInfo(
parser=parser.resource_type,
path_count=parser_path_count,
elapsed=time.perf_counter() - parser_start_timer
))
total_path_count = total_path_count + parser_path_count
# HookParser doesn't run from loaded files, just dbt_project.yml,
# so do separately
hook_parser = HookParser(project, self.manifest, self.root_project)
path = hook_parser.get_path()
file_block = FileBlock(load_source_file(path, ParseFileType.Hook, project.project_name))
self.parse_with_cache(file_block, hook_parser)
# Store the performance info
elapsed = time.perf_counter() - start_timer
project_info = ProjectLoaderInfo(
project_name=project.project_name,
path_count=total_path_count,
elapsed=elapsed,
parsers=project_parser_info
)
self._perf_info.projects.append(project_info)
self._perf_info.path_count = (
self._perf_info.path_count + total_path_count
)
# Loop through macros in the manifest and statically parse
# the 'macro_sql' to find depends_on.macros
def reparse_macros(self):
internal_package_names = get_adapter_package_names(
self.root_project.credentials.type
)
macro_resolver = MacroResolver(
self.manifest.macros,
self.root_project.project_name,
internal_package_names
)
base_ctx = generate_base_context({})
for macro in self.manifest.macros.values():
possible_macro_calls = statically_extract_macro_calls(macro.macro_sql, base_ctx)
for macro_name in possible_macro_calls:
# adapter.dispatch calls can generate a call with the same name as the macro
# it ought to be an adapter prefix (postgres_) or default_
if macro_name == macro.name:
continue
dep_macro_id = macro_resolver.get_macro_id(macro.package_name, macro_name)
if dep_macro_id:
macro.depends_on.add_macro(dep_macro_id) # will check for dupes
# This is where we use the partial-parse state from the
# pickle file (if it exists)
def parse_with_cache(
self,
path: FilePath,
block: FileBlock,
parser: BaseParser,
old_results: Optional[ParseResult],
) -> None:
block = self._get_file(path, parser)
if not self._get_cached(block, old_results, parser):
# _get_cached actually copies the nodes, etc, that were
# generated from the file to the results, in 'sanitized_update'
if not self._get_cached(block, parser):
parser.parse_file(block)
# check if we have a stored parse file, then check if
# file checksums are the same or not and either return
# the old ... stuff or return false (not cached)
def _get_cached(
self,
block: FileBlock,
old_results: Optional[ParseResult],
parser: BaseParser,
) -> bool:
# TODO: handle multiple parsers w/ same files, by
# tracking parser type vs node type? Or tracking actual
# parser type during parsing?
if old_results is None:
if self.old_manifest is None:
return False
if old_results.has_file(block.file):
return self.results.sanitized_update(
block.file, old_results, parser.resource_type
# The 'has_file' method is where we check to see if
# the checksum of the old file is the same as the new
# file. If the checksum is different, 'has_file' returns
# false. If it's the same, the file and the things that
# were generated from it are used.
if self.old_manifest.has_file(block.file):
return self.manifest.sanitized_update(
block.file, self.old_manifest, parser.resource_type
)
return False
def _get_file(self, path: FilePath, parser: BaseParser) -> FileBlock:
if path.search_key in self._loaded_file_cache:
block = self._loaded_file_cache[path.search_key]
else:
block = FileBlock(file=parser.load_file(path))
self._loaded_file_cache[path.search_key] = block
return block
def parse_project(
self,
project: Project,
macro_manifest: Manifest,
old_results: Optional[ParseResult],
) -> None:
parsers: List[Parser] = []
for cls in _parser_types:
parser = cls(self.results, project, self.root_project,
macro_manifest)
parsers.append(parser)
# per-project cache.
self._loaded_file_cache.clear()
project_info: Dict[str, Any] = {'parsers': []}
start_timer = time.perf_counter()
total_path_count = 0
for parser in parsers:
parser_path_count = 0
parser_start_timer = time.perf_counter()
for path in parser.search():
self.parse_with_cache(path, parser, old_results)
parser_path_count = parser_path_count + 1
if parser_path_count % 100 == 0:
print("..", end='', flush=True)
if parser_path_count > 0:
parser_elapsed = time.perf_counter() - parser_start_timer
project_info['parsers'].append({'parser': type(
parser).__name__, 'path_count': parser_path_count,
'elapsed': '{:.2f}'.format(parser_elapsed)})
total_path_count = total_path_count + parser_path_count
if total_path_count > 100:
print("..")
elapsed = time.perf_counter() - start_timer
project_info['project_name'] = project.project_name
project_info['path_count'] = total_path_count
project_info['elapsed'] = '{:.2f}'.format(elapsed)
self._perf_info['projects'].append(project_info)
self._perf_info['path_count'] = self._perf_info['path_count'] + \
total_path_count
def load_only_macros(self) -> Manifest:
old_results = self.read_parse_results()
for project in self.all_projects.values():
parser = MacroParser(self.results, project)
for path in parser.search():
self.parse_with_cache(path, parser, old_results)
# make a manifest with just the macros to get the context
macro_manifest = Manifest.from_macros(
macros=self.results.macros,
files=self.results.files
)
self.macro_hook(macro_manifest)
return macro_manifest
def load(self, macro_manifest: Manifest):
old_results = self.read_parse_results()
if old_results is not None:
logger.debug('Got an acceptable cached parse result')
self.results.macros.update(macro_manifest.macros)
self.results.files.update(macro_manifest.files)
start_timer = time.perf_counter()
for project in self.all_projects.values():
# parse a single project
self.parse_project(project, macro_manifest, old_results)
self._perf_info['parse_project_elapsed'] = '{:.2f}'.format(
time.perf_counter() - start_timer)
def write_parse_results(self):
def write_manifest_for_partial_parse(self):
path = os.path.join(self.root_project.target_path,
PARTIAL_PARSE_FILE_NAME)
make_directory(self.root_project.target_path)
with open(path, 'wb') as fp:
pickle.dump(self.results, fp)
pickle.dump(self.manifest, fp)
def matching_parse_results(self, result: ParseResult) -> bool:
def matching_parse_results(self, manifest: Manifest) -> bool:
"""Compare the global hashes of the read-in parse results' values to
the known ones, and return if it is ok to re-use the results.
"""
try:
if result.dbt_version != __version__:
if manifest.metadata.dbt_version != __version__:
logger.debug(
'dbt version mismatch: {} != {}, cache invalidated'
.format(result.dbt_version, __version__)
.format(manifest.metadata.dbt_version, __version__)
)
return False
except AttributeError:
logger.debug('malformed result file, cache invalidated')
except AttributeError as exc:
logger.debug(f"malformed result file, cache invalidated: {exc}")
return False
valid = True
if self.results.vars_hash != result.vars_hash:
if not self.manifest.state_check or not manifest.state_check:
return False
if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
logger.debug('vars hash mismatch, cache invalidated')
valid = False
if self.results.profile_hash != result.profile_hash:
if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
logger.debug('profile hash mismatch, cache invalidated')
valid = False
missing_keys = {
k for k in self.results.project_hashes
if k not in result.project_hashes
k for k in self.manifest.state_check.project_hashes
if k not in manifest.state_check.project_hashes
}
if missing_keys:
logger.debug(
@@ -276,9 +378,9 @@ class ManifestLoader:
)
valid = False
for key, new_value in self.results.project_hashes.items():
if key in result.project_hashes:
old_value = result.project_hashes[key]
for key, new_value in self.manifest.state_check.project_hashes.items():
if key in manifest.state_check.project_hashes:
old_value = manifest.state_check.project_hashes[key]
if new_value != old_value:
logger.debug(
'For key {}, hash mismatch ({} -> {}), cache '
@@ -298,7 +400,7 @@ class ManifestLoader:
else:
return DEFAULT_PARTIAL_PARSE
def read_parse_results(self) -> Optional[ParseResult]:
def read_saved_manifest(self) -> Optional[Manifest]:
if not self._partial_parse_enabled():
logger.debug('Partial parsing not enabled')
return None
@@ -308,82 +410,130 @@ class ManifestLoader:
if os.path.exists(path):
try:
with open(path, 'rb') as fp:
result: ParseResult = pickle.load(fp)
manifest: Manifest = pickle.load(fp)
# keep this check inside the try/except in case something about
# the file has changed in weird ways, perhaps due to being a
# different version of dbt
if self.matching_parse_results(result):
return result
if self.matching_parse_results(manifest):
return manifest
except Exception as exc:
logger.debug(
'Failed to load parsed file from disk at {}: {}'
.format(path, exc),
exc_info=True
)
return None
def process_manifest(self, manifest: Manifest):
# This find the sources, refs, and docs and resolves them
# for nodes and exposures
def process_manifest(self):
project_name = self.root_project.project_name
process_sources(manifest, project_name)
process_refs(manifest, project_name)
process_docs(manifest, self.root_project)
process_sources(self.manifest, project_name)
process_refs(self.manifest, project_name)
process_docs(self.manifest, self.root_project)
def create_manifest(self) -> Manifest:
# before we do anything else, patch the sources. This mutates
# results.disabled, so it needs to come before the final 'disabled'
# list is created
def update_manifest(self) -> Manifest:
start_patch = time.perf_counter()
sources = patch_sources(self.results, self.root_project)
self._perf_info['patch_sources_elapsed'] = '{:.2f}'.format(
time.perf_counter() - start_patch)
# patch_sources converts the UnparsedSourceDefinitions in the
# Manifest.sources to ParsedSourceDefinition via 'patch_source'
# in SourcePatcher
sources = patch_sources(self.root_project, self.manifest)
self.manifest.sources = sources
# ParseResults had a 'disabled' attribute which was a dictionary
# which is now named '_disabled'. This used to copy from
# ParseResults to the Manifest. Can this be normalized so
# there's only one disabled?
disabled = []
for value in self.results.disabled.values():
for value in self.manifest._disabled.values():
disabled.extend(value)
nodes: MutableMapping[str, ManifestNode] = {
k: v for k, v in self.results.nodes.items()
}
manifest = Manifest(
nodes=nodes,
sources=sources,
macros=self.results.macros,
docs=self.results.docs,
exposures=self.results.exposures,
metadata=self.root_project.get_metadata(),
disabled=disabled,
files=self.results.files,
selectors=self.root_project.manifest_selectors,
self.manifest.disabled = disabled
self._perf_info.patch_sources_elapsed = (
time.perf_counter() - start_patch
)
manifest.patch_nodes(self.results.patches)
manifest.patch_macros(self.results.macro_patches)
self.manifest.selectors = self.root_project.manifest_selectors
# do the node and macro patches
self.manifest.patch_nodes()
self.manifest.patch_macros()
# process_manifest updates the refs, sources, and docs
start_process = time.perf_counter()
self.process_manifest(manifest)
self._perf_info['process_manifest_elapsed'] = '{:.2f}'.format(
time.perf_counter() - start_process)
return manifest
self.process_manifest()
@classmethod
def load_all(
cls,
root_config: RuntimeConfig,
macro_manifest: Manifest,
macro_hook: Callable[[Manifest], Any],
) -> Manifest:
with PARSING_STATE:
start_load_all = time.perf_counter()
projects = root_config.load_dependencies()
loader = cls(root_config, projects, macro_hook)
loader.load(macro_manifest=macro_manifest)
loader.write_parse_results()
manifest = loader.create_manifest()
_check_manifest(manifest, root_config)
manifest.build_flat_graph()
loader._perf_info['load_all_elapsed'] = '{:.2f}'.format(
time.perf_counter() - start_load_all)
return manifest
self._perf_info.process_manifest_elapsed = (
time.perf_counter() - start_process
)
return self.manifest
# TODO: this should be calculated per-file based on the vars() calls made in
# parsing, so changing one var doesn't invalidate everything. also there should
# be something like that for env_var - currently changing env_vars in way that
# impact graph selection or configs will result in weird test failures.
# finally, we should hash the actual profile used, not just root project +
# profiles.yml + relevant args. While sufficient, it is definitely overkill.
def build_manifest_state_check(self):
config = self.root_project
all_projects = self.all_projects
# if any of these change, we need to reject the parser
vars_hash = FileHash.from_contents(
'\x00'.join([
getattr(config.args, 'vars', '{}') or '{}',
getattr(config.args, 'profile', '') or '',
getattr(config.args, 'target', '') or '',
__version__
])
)
profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
project_hashes = {}
for name, project in all_projects.items():
path = os.path.join(project.project_root, 'dbt_project.yml')
with open(path) as fp:
project_hashes[name] = FileHash.from_contents(fp.read())
state_check = ManifestStateCheck(
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
)
return state_check
def save_macros_to_adapter(self, adapter):
macro_manifest = MacroManifest(self.manifest.macros)
adapter._macro_manifest_lazy = macro_manifest
# This executes the callable macro_hook and sets the
# query headers
self.macro_hook(macro_manifest)
# This creates a MacroManifest which contains the macros in
# the adapter. Only called by the load_macros call from the
# adapter.
def create_macro_manifest(self):
for project in self.all_projects.values():
# what is the manifest passed in actually used for?
macro_parser = MacroParser(project, self.manifest)
for path in macro_parser.get_paths():
source_file = load_source_file(
path, ParseFileType.Macro, project.project_name)
block = FileBlock(source_file)
# This does not add the file to the manifest.files,
# but that shouldn't be necessary here.
self.parse_with_cache(block, macro_parser)
macro_manifest = MacroManifest(self.manifest.macros)
return macro_manifest
# This is called by the adapter code only, to create the
# MacroManifest that's stored in the adapter.
# 'get_full_manifest' uses a persistent ManifestLoader while this
# creates a temporary ManifestLoader and throws it away.
# Not sure when this would actually get used except in tests.
# The ManifestLoader loads macros with other files, then copies
# into the adapter MacroManifest.
@classmethod
def load_macros(
cls,
@@ -392,8 +542,33 @@ class ManifestLoader:
) -> Manifest:
with PARSING_STATE:
projects = root_config.load_dependencies()
# This creates a loader object, including result,
# and then throws it away, returning only the
# manifest
loader = cls(root_config, projects, macro_hook)
return loader.load_only_macros()
macro_manifest = loader.create_macro_manifest()
return macro_manifest
# Create tracking event for saving performance info
def track_project_load(self):
invocation_id = dbt.tracking.active_user.invocation_id
dbt.tracking.track_project_load({
"invocation_id": invocation_id,
"project_id": self.root_project.hashed_name(),
"path_count": self._perf_info.path_count,
"read_files_elapsed": self._perf_info.read_files_elapsed,
"load_macros_elapsed": self._perf_info.load_macros_elapsed,
"parse_project_elapsed": self._perf_info.parse_project_elapsed,
"patch_sources_elapsed": self._perf_info.patch_sources_elapsed,
"process_manifest_elapsed": (
self._perf_info.process_manifest_elapsed
),
"load_all_elapsed": self._perf_info.load_all_elapsed,
"is_partial_parse_enabled": (
self._perf_info.is_partial_parse_enabled
),
})
def invalid_ref_fail_unless_test(node, target_model_name,
@@ -490,6 +665,7 @@ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
_warn_for_unused_resource_config_paths(manifest, config)
# This is just used in test cases
def _load_projects(config, paths):
for path in paths:
try:
@@ -523,6 +699,7 @@ DocsContextCallback = Callable[
]
# node and column descriptions
def _process_docs_for_node(
context: Dict[str, Any],
node: ManifestNode,
@@ -532,6 +709,7 @@ def _process_docs_for_node(
column.description = get_rendered(column.description, context)
# source and table descriptions, column descriptions
def _process_docs_for_source(
context: Dict[str, Any],
source: ParsedSourceDefinition,
@@ -549,6 +727,7 @@ def _process_docs_for_source(
column.description = column_desc
# macro argument descriptions
def _process_docs_for_macro(
context: Dict[str, Any], macro: ParsedMacro
) -> None:
@@ -557,6 +736,17 @@ def _process_docs_for_macro(
arg.description = get_rendered(arg.description, context)
# exposure descriptions
def _process_docs_for_exposure(
context: Dict[str, Any], exposure: ParsedExposure
) -> None:
exposure.description = get_rendered(exposure.description, context)
# nodes: node and column descriptions
# sources: source and table descriptions, column descriptions
# macros: macro argument descriptions
# exposures: exposure descriptions
def process_docs(manifest: Manifest, config: RuntimeConfig):
for node in manifest.nodes.values():
ctx = generate_runtime_docs(
@@ -582,6 +772,14 @@ def process_docs(manifest: Manifest, config: RuntimeConfig):
config.project_name,
)
_process_docs_for_macro(ctx, macro)
for exposure in manifest.exposures.values():
ctx = generate_runtime_docs(
config,
exposure,
manifest,
config.project_name,
)
_process_docs_for_exposure(ctx, exposure)
def _process_refs_for_exposure(
@@ -667,9 +865,12 @@ def _process_refs_for_node(
# TODO: I think this is extraneous, node should already be the same
# as manifest.nodes[node.unique_id] (we're mutating node here, not
# making a new one)
# Q: could we stop doing this?
manifest.update_node(node)
# Takes references in 'refs' array of nodes and exposures, finds the target
# node, and updates 'depends_on.nodes' with the unique id
def process_refs(manifest: Manifest, current_project: str):
for node in manifest.nodes.values():
_process_refs_for_node(manifest, current_project, node)
@@ -729,6 +930,9 @@ def _process_sources_for_node(
manifest.update_node(node)
# Loops through all nodes and exposures, for each element in
# 'sources' array finds the source node and updates the
# 'depends_on.nodes' array with the unique id
def process_sources(manifest: Manifest, current_project: str):
for node in manifest.nodes.values():
if node.resource_type == NodeType.Source:
@@ -740,6 +944,8 @@ def process_sources(manifest: Manifest, current_project: str):
return manifest
# This is called in task.rpc.sql_commands when a "dynamic" node is
# created in the manifest, in 'add_refs'
def process_macro(
config: RuntimeConfig, manifest: Manifest, macro: ParsedMacro
) -> None:
@@ -752,6 +958,8 @@ def process_macro(
_process_docs_for_macro(ctx, macro)
# This is called in task.rpc.sql_commands when a "dynamic" node is
# created in the manifest, in 'add_refs'
def process_node(
config: RuntimeConfig, manifest: Manifest, node: ManifestNode
):
@@ -762,18 +970,3 @@ def process_node(
_process_refs_for_node(manifest, config.project_name, node)
ctx = generate_runtime_docs(config, node, manifest, config.project_name)
_process_docs_for_node(ctx, node)
def load_macro_manifest(
config: RuntimeConfig,
macro_hook: Callable[[Manifest], Any],
) -> Manifest:
return ManifestLoader.load_macros(config, macro_hook)
def load_manifest(
config: RuntimeConfig,
macro_manifest: Manifest,
macro_hook: Callable[[Manifest], Any],
) -> Manifest:
return ManifestLoader.load_all(config, macro_manifest, macro_hook)

View File

@@ -1,17 +1,14 @@
from dbt.contracts.graph.parsed import ParsedModelNode
from dbt.node_types import NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FilesystemSearcher, FileBlock
from dbt.parser.search import FileBlock
class ModelParser(SimpleSQLParser[ParsedModelNode]):
def get_paths(self):
return FilesystemSearcher(
self.project, self.project.source_paths, '.sql'
)
def parse_from_dict(self, dct, validate=True) -> ParsedModelNode:
return ParsedModelNode.from_dict(dct, validate=validate)
if validate:
ParsedModelNode.validate(dct)
return ParsedModelNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:

View File

@@ -0,0 +1,111 @@
from dbt.clients.system import load_file_contents
from dbt.contracts.files import FilePath, ParseFileType, SourceFile, FileHash
from dbt.parser.search import FilesystemSearcher
# This loads the files contents and creates the SourceFile object
def load_source_file(
path: FilePath, parse_file_type: ParseFileType,
project_name: str) -> SourceFile:
file_contents = load_file_contents(path.absolute_path, strip=False)
checksum = FileHash.from_contents(file_contents)
source_file = SourceFile(path=path, checksum=checksum,
parse_file_type=parse_file_type, project_name=project_name)
source_file.contents = file_contents.strip()
return source_file
# Special processing for big seed files
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
if match.seed_too_large():
# We don't want to calculate a hash of this file. Use the path.
source_file = SourceFile.big_seed(match)
else:
file_contents = load_file_contents(match.absolute_path, strip=False)
checksum = FileHash.from_contents(file_contents)
source_file = SourceFile(path=match, checksum=checksum)
source_file.contents = ''
source_file.parse_file_type = ParseFileType.Seed
source_file.project_name = project_name
return source_file
# Use the FilesystemSearcher to get a bunch of FilePaths, then turn
# them into a bunch of FileSource objects
def get_source_files(project, paths, extension, parse_file_type):
# file path list
fp_list = list(FilesystemSearcher(
project, paths, extension
))
# file block list
fb_list = []
for fp in fp_list:
if parse_file_type == ParseFileType.Seed:
fb_list.append(load_seed_source_file(fp, project.project_name))
else:
fb_list.append(load_source_file(
fp, parse_file_type, project.project_name))
return fb_list
def read_files_for_parser(project, files, dirs, extension, parse_ft):
parser_files = []
source_files = get_source_files(
project, dirs, extension, parse_ft
)
for sf in source_files:
files[sf.search_key] = sf
parser_files.append(sf.search_key)
return parser_files
# This needs to read files for multiple projects, so the 'files'
# dictionary needs to be passed in. What determines the order of
# the various projects? Is the root project always last? Do the
# non-root projects need to be done separately in order?
def read_files(project, files, parser_files):
project_files = {}
project_files['MacroParser'] = read_files_for_parser(
project, files, project.macro_paths, '.sql', ParseFileType.Macro,
)
project_files['ModelParser'] = read_files_for_parser(
project, files, project.source_paths, '.sql', ParseFileType.Model,
)
project_files['SnapshotParser'] = read_files_for_parser(
project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot,
)
project_files['AnalysisParser'] = read_files_for_parser(
project, files, project.analysis_paths, '.sql', ParseFileType.Analysis,
)
project_files['DataTestParser'] = read_files_for_parser(
project, files, project.test_paths, '.sql', ParseFileType.Test,
)
project_files['SeedParser'] = read_files_for_parser(
project, files, project.data_paths, '.csv', ParseFileType.Seed,
)
project_files['DocumentationParser'] = read_files_for_parser(
project, files, project.docs_paths, '.md', ParseFileType.Documentation,
)
project_files['SchemaParser'] = read_files_for_parser(
project, files, project.all_source_paths, '.yml', ParseFileType.Schema,
)
# Also read .yaml files for schema files. Might be better to change
# 'read_files_for_parser' accept an array in the future.
yaml_files = read_files_for_parser(
project, files, project.all_source_paths, '.yaml', ParseFileType.Schema,
)
project_files['SchemaParser'].extend(yaml_files)
# Store the parser files for this particular project
parser_files[project.project_name] = project_files

View File

@@ -1,328 +0,0 @@
from dataclasses import dataclass, field
from typing import TypeVar, MutableMapping, Mapping, Union, List
from hologram import JsonSchemaMixin
from dbt.contracts.files import RemoteFile, FileHash, SourceFile
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.graph.parsed import (
HasUniqueID,
ParsedAnalysisNode,
ParsedDataTestNode,
ParsedDocumentation,
ParsedHookNode,
ParsedMacro,
ParsedMacroPatch,
ParsedModelNode,
ParsedNodePatch,
ParsedExposure,
ParsedRPCNode,
ParsedSeedNode,
ParsedSchemaTestNode,
ParsedSnapshotNode,
UnpatchedSourceDefinition,
)
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.util import Writable, Replaceable, MacroKey, SourceKey
from dbt.exceptions import (
raise_duplicate_resource_name, raise_duplicate_patch_name,
raise_duplicate_macro_patch_name, CompilationException, InternalException,
raise_compiler_error, raise_duplicate_source_patch_name
)
from dbt.node_types import NodeType
from dbt.ui import line_wrap_message
from dbt.version import __version__
# Parsers can return anything as long as it's a unique ID
ParsedValueType = TypeVar('ParsedValueType', bound=HasUniqueID)
def _check_duplicates(
value: HasUniqueID, src: Mapping[str, HasUniqueID]
):
if value.unique_id in src:
raise_duplicate_resource_name(value, src[value.unique_id])
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedDataTestNode,
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSchemaTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
]
def dict_field():
return field(default_factory=dict)
@dataclass
class ParseResult(JsonSchemaMixin, Writable, Replaceable):
vars_hash: FileHash
profile_hash: FileHash
project_hashes: MutableMapping[str, FileHash]
nodes: MutableMapping[str, ManifestNodes] = dict_field()
sources: MutableMapping[str, UnpatchedSourceDefinition] = dict_field()
docs: MutableMapping[str, ParsedDocumentation] = dict_field()
macros: MutableMapping[str, ParsedMacro] = dict_field()
exposures: MutableMapping[str, ParsedExposure] = dict_field()
macro_patches: MutableMapping[MacroKey, ParsedMacroPatch] = dict_field()
patches: MutableMapping[str, ParsedNodePatch] = dict_field()
source_patches: MutableMapping[SourceKey, SourcePatch] = dict_field()
files: MutableMapping[str, SourceFile] = dict_field()
disabled: MutableMapping[str, List[CompileResultNode]] = dict_field()
dbt_version: str = __version__
def get_file(self, source_file: SourceFile) -> SourceFile:
key = source_file.search_key
if key is None:
return source_file
if key not in self.files:
self.files[key] = source_file
return self.files[key]
def add_source(
self, source_file: SourceFile, source: UnpatchedSourceDefinition
):
# sources can't be overwritten!
_check_duplicates(source, self.sources)
self.sources[source.unique_id] = source
self.get_file(source_file).sources.append(source.unique_id)
def add_node_nofile(self, node: ManifestNodes):
# nodes can't be overwritten!
_check_duplicates(node, self.nodes)
self.nodes[node.unique_id] = node
def add_node(self, source_file: SourceFile, node: ManifestNodes):
self.add_node_nofile(node)
self.get_file(source_file).nodes.append(node.unique_id)
def add_exposure(self, source_file: SourceFile, exposure: ParsedExposure):
_check_duplicates(exposure, self.exposures)
self.exposures[exposure.unique_id] = exposure
self.get_file(source_file).exposures.append(exposure.unique_id)
def add_disabled_nofile(self, node: CompileResultNode):
if node.unique_id in self.disabled:
self.disabled[node.unique_id].append(node)
else:
self.disabled[node.unique_id] = [node]
def add_disabled(self, source_file: SourceFile, node: CompileResultNode):
self.add_disabled_nofile(node)
self.get_file(source_file).nodes.append(node.unique_id)
def add_macro(self, source_file: SourceFile, macro: ParsedMacro):
if macro.unique_id in self.macros:
# detect that the macro exists and emit an error
other_path = self.macros[macro.unique_id].original_file_path
# subtract 2 for the "Compilation Error" indent
# note that the line wrap eats newlines, so if you want newlines,
# this is the result :(
msg = line_wrap_message(
f'''\
dbt found two macros named "{macro.name}" in the project
"{macro.package_name}".
To fix this error, rename or remove one of the following
macros:
- {macro.original_file_path}
- {other_path}
''',
subtract=2
)
raise_compiler_error(msg)
self.macros[macro.unique_id] = macro
self.get_file(source_file).macros.append(macro.unique_id)
def add_doc(self, source_file: SourceFile, doc: ParsedDocumentation):
_check_duplicates(doc, self.docs)
self.docs[doc.unique_id] = doc
self.get_file(source_file).docs.append(doc.unique_id)
def add_patch(
self, source_file: SourceFile, patch: ParsedNodePatch
) -> None:
# patches can't be overwritten
if patch.name in self.patches:
raise_duplicate_patch_name(patch, self.patches[patch.name])
self.patches[patch.name] = patch
self.get_file(source_file).patches.append(patch.name)
def add_macro_patch(
self, source_file: SourceFile, patch: ParsedMacroPatch
) -> None:
# macros are fully namespaced
key = (patch.package_name, patch.name)
if key in self.macro_patches:
raise_duplicate_macro_patch_name(patch, self.macro_patches[key])
self.macro_patches[key] = patch
self.get_file(source_file).macro_patches.append(key)
def add_source_patch(
self, source_file: SourceFile, patch: SourcePatch
) -> None:
# source patches must be unique
key = (patch.overrides, patch.name)
if key in self.source_patches:
raise_duplicate_source_patch_name(patch, self.source_patches[key])
self.source_patches[key] = patch
self.get_file(source_file).source_patches.append(key)
def _get_disabled(
self,
unique_id: str,
match_file: SourceFile,
) -> List[CompileResultNode]:
if unique_id not in self.disabled:
raise InternalException(
'called _get_disabled with id={}, but it does not exist'
.format(unique_id)
)
return [
n for n in self.disabled[unique_id]
if n.original_file_path == match_file.path.original_file_path
]
def _process_node(
self,
node_id: str,
source_file: SourceFile,
old_file: SourceFile,
old_result: 'ParseResult',
) -> None:
"""Nodes are a special kind of complicated - there can be multiple
with the same name, as long as all but one are disabled.
Only handle nodes where the matching node has the same resource type
as the current parser.
"""
source_path = source_file.path.original_file_path
found: bool = False
if node_id in old_result.nodes:
old_node = old_result.nodes[node_id]
if old_node.original_file_path == source_path:
self.add_node(source_file, old_node)
found = True
if node_id in old_result.disabled:
matches = old_result._get_disabled(node_id, source_file)
for match in matches:
self.add_disabled(source_file, match)
found = True
if not found:
raise CompilationException(
'Expected to find "{}" in cached "manifest.nodes" or '
'"manifest.disabled" based on cached file information: {}!'
.format(node_id, old_file)
)
def sanitized_update(
self,
source_file: SourceFile,
old_result: 'ParseResult',
resource_type: NodeType,
) -> bool:
"""Perform a santized update. If the file can't be updated, invalidate
it and return false.
"""
if isinstance(source_file.path, RemoteFile):
return False
old_file = old_result.get_file(source_file)
for doc_id in old_file.docs:
doc = _expect_value(doc_id, old_result.docs, old_file, "docs")
self.add_doc(source_file, doc)
for macro_id in old_file.macros:
macro = _expect_value(
macro_id, old_result.macros, old_file, "macros"
)
self.add_macro(source_file, macro)
for source_id in old_file.sources:
source = _expect_value(
source_id, old_result.sources, old_file, "sources"
)
self.add_source(source_file, source)
# because we know this is how we _parsed_ the node, we can safely
# assume if it's disabled it was done by the project or file, and
# we can keep our old data
# the node ID could be in old_result.disabled AND in old_result.nodes.
# In that case, we have to make sure the path also matches.
for node_id in old_file.nodes:
# cheat: look at the first part of the node ID and compare it to
# the parser resource type. On a mismatch, bail out.
if resource_type != node_id.split('.')[0]:
continue
self._process_node(node_id, source_file, old_file, old_result)
for exposure_id in old_file.exposures:
exposure = _expect_value(
exposure_id, old_result.exposures, old_file, "exposures"
)
self.add_exposure(source_file, exposure)
patched = False
for name in old_file.patches:
patch = _expect_value(
name, old_result.patches, old_file, "patches"
)
self.add_patch(source_file, patch)
patched = True
if patched:
self.get_file(source_file).patches.sort()
macro_patched = False
for key in old_file.macro_patches:
macro_patch = _expect_value(
key, old_result.macro_patches, old_file, "macro_patches"
)
self.add_macro_patch(source_file, macro_patch)
macro_patched = True
if macro_patched:
self.get_file(source_file).macro_patches.sort()
return True
def has_file(self, source_file: SourceFile) -> bool:
key = source_file.search_key
if key is None:
return False
if key not in self.files:
return False
my_checksum = self.files[key].checksum
return my_checksum == source_file.checksum
@classmethod
def rpc(cls):
# ugh!
return cls(FileHash.empty(), FileHash.empty(), {})
K_T = TypeVar('K_T')
V_T = TypeVar('V_T')
def _expect_value(
key: K_T, src: Mapping[K_T, V_T], old_file: SourceFile, name: str
) -> V_T:
if key not in src:
raise CompilationException(
'Expected to find "{}" in cached "result.{}" based '
'on cached file information: {}!'
.format(key, name, old_file)
)
return src[key]

View File

@@ -22,11 +22,10 @@ class RPCBlock(FileBlock):
class RPCCallParser(SimpleSQLParser[ParsedRPCNode]):
def get_paths(self):
return []
def parse_from_dict(self, dct, validate=True) -> ParsedRPCNode:
return ParsedRPCNode.from_dict(dct, validate=validate)
if validate:
ParsedRPCNode.validate(dct)
return ParsedRPCNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:

View File

@@ -179,12 +179,13 @@ class TestBuilder(Generic[Testable]):
- or it may not be namespaced (test)
"""
# The 'test_name' is used to find the 'macro' that implements the test
TEST_NAME_PATTERN = re.compile(
r'((?P<test_namespace>([a-zA-Z_][0-9a-zA-Z_]*))\.)?'
r'(?P<test_name>([a-zA-Z_][0-9a-zA-Z_]*))'
)
# map magic keys to default values
MODIFIER_ARGS = {'severity': 'ERROR', 'tags': []}
# kwargs representing test configs
MODIFIER_ARGS = ('severity', 'tags', 'enabled')
def __init__(
self,
@@ -216,11 +217,12 @@ class TestBuilder(Generic[Testable]):
self.name: str = groups['test_name']
self.namespace: str = groups['test_namespace']
self.modifiers: Dict[str, Any] = {}
for key, default in self.MODIFIER_ARGS.items():
value = self.args.pop(key, default)
for key in self.MODIFIER_ARGS:
value = self.args.pop(key, None)
if isinstance(value, str):
value = get_rendered(value, render_ctx)
self.modifiers[key] = value
value = get_rendered(value, render_ctx, native=True)
if value is not None:
self.modifiers[key] = value
if self.namespace is not None:
self.package_name = self.namespace
@@ -266,8 +268,15 @@ class TestBuilder(Generic[Testable]):
test_args['column_name'] = name
return test_name, test_args
def severity(self) -> str:
return self.modifiers.get('severity', 'ERROR').upper()
def enabled(self) -> Optional[bool]:
return self.modifiers.get('enabled')
def severity(self) -> Optional[str]:
sev = self.modifiers.get('severity')
if sev:
return sev.upper()
else:
return None
def tags(self) -> List[str]:
tags = self.modifiers.get('tags', [])
@@ -302,13 +311,25 @@ class TestBuilder(Generic[Testable]):
name = '{}_{}'.format(self.namespace, name)
return get_nice_schema_test_name(name, self.target.name, self.args)
def construct_config(self) -> str:
configs = ",".join([
f"{key}=" + (f"'{value}'" if isinstance(value, str) else str(value))
for key, value
in self.modifiers.items()
])
if configs:
return f"{{{{ config({configs}) }}}}"
else:
return ""
# this is the 'raw_sql' that's used in 'render_update' and execution
# of the test macro
def build_raw_sql(self) -> str:
return (
"{{{{ config(severity='{severity}') }}}}"
"{{{{ {macro}(**{kwargs_name}) }}}}"
"{config}{{{{ {macro}(**{kwargs_name}) }}}}"
).format(
macro=self.macro_name(),
severity=self.severity(),
config=self.construct_config(),
kwargs_name=SCHEMA_TEST_KWARGS_NAME,
)

View File

@@ -6,9 +6,9 @@ from typing import (
Iterable, Dict, Any, Union, List, Optional, Generic, TypeVar, Type
)
from hologram import ValidationError, JsonSchemaMixin
from dbt.dataclass_schema import ValidationError, dbtClassMixin
from dbt.adapters.factory import get_adapter
from dbt.adapters.factory import get_adapter, get_adapter_package_names
from dbt.clients.jinja import get_rendered, add_rendered_test_kwargs
from dbt.clients.yaml_helper import load_yaml_text
from dbt.config.renderer import SchemaYamlRenderer
@@ -20,7 +20,10 @@ from dbt.context.context_config import (
)
from dbt.context.configured import generate_schema_yml
from dbt.context.target import generate_target_context
from dbt.context.providers import generate_parse_exposure
from dbt.context.providers import (
generate_parse_exposure, generate_test_context
)
from dbt.context.macro_resolver import MacroResolver
from dbt.contracts.files import FileHash
from dbt.contracts.graph.manifest import SourceFile
from dbt.contracts.graph.model_config import SourceConfig
@@ -49,11 +52,11 @@ from dbt.contracts.graph.unparsed import (
from dbt.exceptions import (
validator_error_message, JSONValidationException,
raise_invalid_schema_yml_version, ValidationException,
CompilationException, warn_or_error, InternalException
CompilationException, InternalException
)
from dbt.node_types import NodeType
from dbt.parser.base import SimpleParser
from dbt.parser.search import FileBlock, FilesystemSearcher
from dbt.parser.search import FileBlock
from dbt.parser.schema_test_builders import (
TestBuilder, SchemaTestBlock, TargetBlock, YamlBlock,
TestBlock, Testable
@@ -95,6 +98,7 @@ def error_context(
class ParserRef:
"""A helper object to hold parse-time references."""
def __init__(self):
self.column_info: Dict[str, ColumnInfo] = {}
@@ -154,9 +158,9 @@ def merge_freshness(
class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
def __init__(
self, results, project, root_project, macro_manifest,
self, project, manifest, root_project,
) -> None:
super().__init__(results, project, root_project, macro_manifest)
super().__init__(project, manifest, root_project)
all_v_2 = (
self.root_project.config_version == 2 and
self.project.config_version == 2
@@ -172,6 +176,15 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
self.raw_renderer = SchemaYamlRenderer(ctx)
internal_package_names = get_adapter_package_names(
self.root_project.credentials.type
)
self.macro_resolver = MacroResolver(
self.manifest.macros,
self.root_project.project_name,
internal_package_names
)
@classmethod
def get_compiled_path(cls, block: FileBlock) -> str:
# should this raise an error?
@@ -181,29 +194,12 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
def resource_type(self) -> NodeType:
return NodeType.Test
def get_paths(self):
# TODO: In order to support this, make FilesystemSearcher accept a list
# of file patterns. eg: ['.yml', '.yaml']
yaml_files = list(FilesystemSearcher(
self.project, self.project.all_source_paths, '.yaml'
))
if yaml_files:
warn_or_error(
'A future version of dbt will parse files with both'
' .yml and .yaml file extensions. dbt found'
f' {len(yaml_files)} files with .yaml extensions in'
' your dbt project. To avoid errors when upgrading'
' to a future release, either remove these files from'
' your dbt project, or change their extensions.'
)
return FilesystemSearcher(
self.project, self.project.all_source_paths, '.yml'
)
def parse_from_dict(self, dct, validate=True) -> ParsedSchemaTestNode:
return ParsedSchemaTestNode.from_dict(dct, validate=validate)
if validate:
ParsedSchemaTestNode.validate(dct)
return ParsedSchemaTestNode.from_dict(dct)
def _parse_format_version(
def _check_format_version(
self, yaml: YamlBlock
) -> None:
path = yaml.path.relative_path
@@ -269,6 +265,8 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
relation_cls = adapter.Relation
return str(relation_cls.create_from(self.root_project, node))
# This converts an UnpatchedSourceDefinition to a ParsedSourceDefinition
# it is used by the SourcePatcher.
def parse_source(
self, target: UnpatchedSourceDefinition
) -> ParsedSourceDefinition:
@@ -370,10 +368,11 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
'config': self.config_dict(config),
'test_metadata': test_metadata,
'column_name': column_name,
'checksum': FileHash.empty().to_dict(),
'checksum': FileHash.empty().to_dict(omit_none=True),
}
try:
return self.parse_from_dict(dct)
ParsedSchemaTestNode.validate(dct)
return ParsedSchemaTestNode.from_dict(dct)
except ValidationError as exc:
msg = validator_error_message(exc)
# this is a bit silly, but build an UnparsedNode just for error
@@ -386,6 +385,7 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
)
raise CompilationException(msg, node=node) from exc
# lots of time spent in this method
def _parse_generic_test(
self,
target: Testable,
@@ -424,6 +424,7 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
# is not necessarily this package's name
fqn = self.get_fqn(fqn_path, builder.fqn_name)
# this is the config that is used in render_update
config = self.initial_config(fqn)
metadata = {
@@ -446,9 +447,56 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
column_name=column_name,
test_metadata=metadata,
)
self.render_update(node, config)
self.render_test_update(node, config, builder)
return node
# This does special shortcut processing for the two
# most common internal macros, not_null and unique,
# which avoids the jinja rendering to resolve config
# and variables, etc, which might be in the macro.
# In the future we will look at generalizing this
# more to handle additional macros or to use static
# parsing to avoid jinja overhead.
def render_test_update(self, node, config, builder):
macro_unique_id = self.macro_resolver.get_macro_id(
node.package_name, 'test_' + builder.name)
# Add the depends_on here so we can limit the macros added
# to the context in rendering processing
node.depends_on.add_macro(macro_unique_id)
if (macro_unique_id in
['macro.dbt.test_not_null', 'macro.dbt.test_unique']):
self.update_parsed_node(node, config)
if builder.severity() is not None:
node.unrendered_config['severity'] = builder.severity()
node.config['severity'] = builder.severity()
if builder.enabled() is not None:
node.config['enabled'] = builder.enabled()
# source node tests are processed at patch_source time
if isinstance(builder.target, UnpatchedSourceDefinition):
sources = [builder.target.fqn[-2], builder.target.fqn[-1]]
node.sources.append(sources)
else: # all other nodes
node.refs.append([builder.target.name])
else:
try:
# make a base context that doesn't have the magic kwargs field
context = generate_test_context(
node, self.root_project, self.manifest, config,
self.macro_resolver,
)
# update with rendered test kwargs (which collects any refs)
add_rendered_test_kwargs(context, node, capture_macros=True)
# the parsed node is not rendered in the native context.
get_rendered(
node.raw_sql, context, node, capture_macros=True
)
self.update_parsed_node(node, config)
except ValidationError as exc:
# we got a ValidationError - probably bad types in config()
msg = validator_error_message(exc)
raise CompilationException(msg, node=node) from exc
def parse_source_test(
self,
target: UnpatchedSourceDefinition,
@@ -480,9 +528,9 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
)
# we can't go through result.add_node - no file... instead!
if node.config.enabled:
self.results.add_node_nofile(node)
self.manifest.add_node_nofile(node)
else:
self.results.add_disabled_nofile(node)
self.manifest.add_disabled_nofile(node)
return node
def parse_node(self, block: SchemaTestBlock) -> ParsedSchemaTestNode:
@@ -556,14 +604,17 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
def parse_exposures(self, block: YamlBlock) -> None:
parser = ExposureParser(self, block)
for node in parser.parse():
self.results.add_exposure(block.file, node)
self.manifest.add_exposure(block.file, node)
def parse_file(self, block: FileBlock) -> None:
dct = self._yaml_from_file(block.file)
# mark the file as seen, even if there are no macros in it
self.results.get_file(block.file)
# mark the file as seen, in Manifest.files
self.manifest.get_file(block.file)
if dct:
try:
# This does a deep_map to check for circular references
dct = self.raw_renderer.render_data(dct)
except CompilationException as exc:
raise CompilationException(
@@ -571,24 +622,58 @@ class SchemaParser(SimpleParser[SchemaTestBlock, ParsedSchemaTestNode]):
f'project {self.project.project_name}: {exc}'
) from exc
# contains the FileBlock and the data (dictionary)
yaml_block = YamlBlock.from_file_block(block, dct)
self._parse_format_version(yaml_block)
# checks version
self._check_format_version(yaml_block)
parser: YamlDocsReader
for key in NodeType.documentable():
plural = key.pluralize()
if key == NodeType.Source:
parser = SourceParser(self, yaml_block, plural)
elif key == NodeType.Macro:
parser = MacroPatchParser(self, yaml_block, plural)
elif key == NodeType.Analysis:
parser = AnalysisPatchParser(self, yaml_block, plural)
else:
parser = TestablePatchParser(self, yaml_block, plural)
# There are 7 kinds of parsers:
# Model, Seed, Snapshot, Source, Macro, Analysis, Exposures
# NonSourceParser.parse(), TestablePatchParser is a variety of
# NodePatchParser
if 'models' in dct:
parser = TestablePatchParser(self, yaml_block, 'models')
for test_block in parser.parse():
self.parse_tests(test_block)
self.parse_exposures(yaml_block)
# NonSourceParser.parse()
if 'seeds' in dct:
parser = TestablePatchParser(self, yaml_block, 'seeds')
for test_block in parser.parse():
self.parse_tests(test_block)
# NonSourceParser.parse()
if 'snapshots' in dct:
parser = TestablePatchParser(self, yaml_block, 'snapshots')
for test_block in parser.parse():
self.parse_tests(test_block)
# This parser uses SourceParser.parse() which doesn't return
# any test blocks. Source tests are handled at a later point
# in the process.
if 'sources' in dct:
parser = SourceParser(self, yaml_block, 'sources')
parser.parse()
# NonSourceParser.parse()
if 'macros' in dct:
parser = MacroPatchParser(self, yaml_block, 'macros')
for test_block in parser.parse():
self.parse_tests(test_block)
# NonSourceParser.parse()
if 'analyses' in dct:
parser = AnalysisPatchParser(self, yaml_block, 'analyses')
for test_block in parser.parse():
self.parse_tests(test_block)
# parse exposures
if 'exposures' in dct:
self.parse_exposures(yaml_block)
Parsed = TypeVar(
@@ -605,17 +690,20 @@ NonSourceTarget = TypeVar(
)
# abstract base class (ABCMeta)
class YamlReader(metaclass=ABCMeta):
def __init__(
self, schema_parser: SchemaParser, yaml: YamlBlock, key: str
) -> None:
self.schema_parser = schema_parser
# key: models, seeds, snapshots, sources, macros,
# analyses, exposures
self.key = key
self.yaml = yaml
@property
def results(self):
return self.schema_parser.results
def manifest(self):
return self.schema_parser.manifest
@property
def project(self):
@@ -629,6 +717,9 @@ class YamlReader(metaclass=ABCMeta):
def root_project(self):
return self.schema_parser.root_project
# for the different schema subparsers ('models', 'source', etc)
# get the list of dicts pointed to by the key in the yaml config,
# ensure that the dicts have string keys
def get_key_dicts(self) -> Iterable[Dict[str, Any]]:
data = self.yaml.data.get(self.key, [])
if not isinstance(data, list):
@@ -638,7 +729,10 @@ class YamlReader(metaclass=ABCMeta):
)
path = self.yaml.path.original_file_path
# for each dict in the data (which is a list of dicts)
for entry in data:
# check that entry is a dict and that all dict values
# are strings
if coerce_dict_str(entry) is not None:
yield entry
else:
@@ -654,19 +748,27 @@ class YamlDocsReader(YamlReader):
raise NotImplementedError('parse is abstract')
T = TypeVar('T', bound=JsonSchemaMixin)
T = TypeVar('T', bound=dbtClassMixin)
# This parses the 'sources' keys in yaml files.
class SourceParser(YamlDocsReader):
def _target_from_dict(self, cls: Type[T], data: Dict[str, Any]) -> T:
path = self.yaml.path.original_file_path
try:
cls.validate(data)
return cls.from_dict(data)
except (ValidationError, JSONValidationException) as exc:
msg = error_context(path, self.key, data, exc)
raise CompilationException(msg) from exc
# The other parse method returns TestBlocks. This one doesn't.
# This takes the yaml dictionaries in 'sources' keys and uses them
# to create UnparsedSourceDefinition objects. They are then turned
# into UnpatchedSourceDefinition objects in 'add_source_definitions'
# or SourcePatch objects in 'add_source_patch'
def parse(self) -> List[TestBlock]:
# get a verified list of dicts for the key handled by this parser
for data in self.get_key_dicts():
data = self.project.credentials.translate_aliases(
data, recurse=True
@@ -676,7 +778,7 @@ class SourceParser(YamlDocsReader):
if is_override:
data['path'] = self.yaml.path.original_file_path
patch = self._target_from_dict(SourcePatch, data)
self.results.add_source_patch(self.yaml.file, patch)
self.manifest.add_source_patch(self.yaml.file, patch)
else:
source = self._target_from_dict(UnparsedSourceDefinition, data)
self.add_source_definitions(source)
@@ -706,13 +808,15 @@ class SourceParser(YamlDocsReader):
resource_type=NodeType.Source,
fqn=fqn,
)
self.results.add_source(self.yaml.file, result)
self.manifest.add_source(self.yaml.file, result)
# This class has three main subclasses: TestablePatchParser (models,
# seeds, snapshots), MacroPatchParser, and AnalysisPatchParser
class NonSourceParser(YamlDocsReader, Generic[NonSourceTarget, Parsed]):
@abstractmethod
def _target_type(self) -> Type[NonSourceTarget]:
raise NotImplementedError('_unsafe_from_dict not implemented')
raise NotImplementedError('_target_type not implemented')
@abstractmethod
def get_block(self, node: NonSourceTarget) -> TargetBlock:
@@ -727,33 +831,55 @@ class NonSourceParser(YamlDocsReader, Generic[NonSourceTarget, Parsed]):
def parse(self) -> List[TestBlock]:
node: NonSourceTarget
test_blocks: List[TestBlock] = []
# get list of 'node' objects
# UnparsedNodeUpdate (TestablePatchParser, models, seeds, snapshots)
# = HasColumnTests, HasTests
# UnparsedAnalysisUpdate (UnparsedAnalysisParser, analyses)
# = HasColumnDocs, HasDocs
# UnparsedMacroUpdate (MacroPatchParser, 'macros')
# = HasDocs
# correspond to this parser's 'key'
for node in self.get_unparsed_target():
# node_block is a TargetBlock (Macro or Analysis)
# or a TestBlock (all of the others)
node_block = self.get_block(node)
if isinstance(node_block, TestBlock):
# TestablePatchParser = models, seeds, snapshots
test_blocks.append(node_block)
if isinstance(node, (HasColumnDocs, HasColumnTests)):
# UnparsedNodeUpdate and UnparsedAnalysisUpdate
refs: ParserRef = ParserRef.from_target(node)
else:
refs = ParserRef()
# This adds the node_block to self.manifest
# as a ParsedNodePatch or ParsedMacroPatch
self.parse_patch(node_block, refs)
return test_blocks
def get_unparsed_target(self) -> Iterable[NonSourceTarget]:
path = self.yaml.path.original_file_path
for data in self.get_key_dicts():
# get verified list of dicts for the 'key' that this
# parser handles
key_dicts = self.get_key_dicts()
for data in key_dicts:
# add extra data to each dict. This updates the dicts
# in the parser yaml
data.update({
'original_file_path': path,
'yaml_key': self.key,
'package_name': self.project.project_name,
})
try:
model = self._target_type().from_dict(data)
# target_type: UnparsedNodeUpdate, UnparsedAnalysisUpdate,
# or UnparsedMacroUpdate
self._target_type().validate(data)
node = self._target_type().from_dict(data)
except (ValidationError, JSONValidationException) as exc:
msg = error_context(path, self.key, data, exc)
raise CompilationException(msg) from exc
else:
yield model
yield node
class NodePatchParser(
@@ -763,7 +889,7 @@ class NodePatchParser(
def parse_patch(
self, block: TargetBlock[NodeTarget], refs: ParserRef
) -> None:
result = ParsedNodePatch(
patch = ParsedNodePatch(
name=block.target.name,
original_file_path=block.target.original_file_path,
yaml_key=block.target.yaml_key,
@@ -773,7 +899,7 @@ class NodePatchParser(
meta=block.target.meta,
docs=block.target.docs,
)
self.results.add_patch(self.yaml.file, result)
self.manifest.add_patch(self.yaml.file, patch)
class TestablePatchParser(NodePatchParser[UnparsedNodeUpdate]):
@@ -802,7 +928,7 @@ class MacroPatchParser(NonSourceParser[UnparsedMacroUpdate, ParsedMacroPatch]):
def parse_patch(
self, block: TargetBlock[UnparsedMacroUpdate], refs: ParserRef
) -> None:
result = ParsedMacroPatch(
patch = ParsedMacroPatch(
name=block.target.name,
original_file_path=block.target.original_file_path,
yaml_key=block.target.yaml_key,
@@ -812,7 +938,7 @@ class MacroPatchParser(NonSourceParser[UnparsedMacroUpdate, ParsedMacroPatch]):
meta=block.target.meta,
docs=block.target.docs,
)
self.results.add_macro_patch(self.yaml.file, result)
self.manifest.add_macro_patch(self.yaml.file, patch)
class ExposureParser(YamlReader):
@@ -846,7 +972,7 @@ class ExposureParser(YamlReader):
ctx = generate_parse_exposure(
parsed,
self.root_project,
self.schema_parser.macro_manifest,
self.schema_parser.manifest,
package_name,
)
depends_on_jinja = '\n'.join(
@@ -861,6 +987,7 @@ class ExposureParser(YamlReader):
def parse(self) -> Iterable[ParsedExposure]:
for data in self.get_key_dicts():
try:
UnparsedExposure.validate(data)
unparsed = UnparsedExposure.from_dict(data)
except (ValidationError, JSONValidationException) as exc:
msg = error_context(self.yaml.path, self.key, data, exc)

View File

@@ -11,6 +11,8 @@ from dbt.contracts.files import SourceFile, FilePath
from dbt.exceptions import CompilationException, InternalException
# What's the point of wrapping a SourceFile with this class?
# Could it be removed?
@dataclass
class FileBlock:
file: SourceFile
@@ -30,6 +32,9 @@ class FileBlock:
return self.file.path
# The BlockTag is used in Jinja processing
# Why do we have different classes where the only
# difference is what 'contents' returns?
@dataclass
class BlockContents(FileBlock):
file: SourceFile # if you remove this, mypy will get upset

View File

@@ -1,19 +1,15 @@
from dbt.context.context_config import ContextConfig
from dbt.contracts.files import SourceFile, FilePath
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.node_types import NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FileBlock, FilesystemSearcher
from dbt.parser.search import FileBlock
class SeedParser(SimpleSQLParser[ParsedSeedNode]):
def get_paths(self):
return FilesystemSearcher(
self.project, self.project.data_paths, '.csv'
)
def parse_from_dict(self, dct, validate=True) -> ParsedSeedNode:
return ParsedSeedNode.from_dict(dct, validate=validate)
if validate:
ParsedSeedNode.validate(dct)
return ParsedSeedNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:
@@ -27,13 +23,3 @@ class SeedParser(SimpleSQLParser[ParsedSeedNode]):
self, parsed_node: ParsedSeedNode, config: ContextConfig
) -> None:
"""Seeds don't need to do any rendering."""
def load_file(
self, match: FilePath, *, set_contents: bool = False
) -> SourceFile:
if match.seed_too_large():
# We don't want to calculate a hash of this file. Use the path.
return SourceFile.big_seed(match)
else:
# We want to calculate a hash, but we don't need the contents
return super().load_file(match, set_contents=set_contents)

View File

@@ -1,7 +1,7 @@
import os
from typing import List
from hologram import ValidationError
from dbt.dataclass_schema import ValidationError
from dbt.contracts.graph.parsed import (
IntermediateSnapshotNode, ParsedSnapshotNode
@@ -12,7 +12,7 @@ from dbt.exceptions import (
from dbt.node_types import NodeType
from dbt.parser.base import SQLParser
from dbt.parser.search import (
FilesystemSearcher, BlockContents, BlockSearcher, FileBlock
BlockContents, BlockSearcher, FileBlock
)
from dbt.utils import split_path
@@ -20,13 +20,10 @@ from dbt.utils import split_path
class SnapshotParser(
SQLParser[IntermediateSnapshotNode, ParsedSnapshotNode]
):
def get_paths(self):
return FilesystemSearcher(
self.project, self.project.snapshot_paths, '.sql'
)
def parse_from_dict(self, dct, validate=True) -> IntermediateSnapshotNode:
return IntermediateSnapshotNode.from_dict(dct, validate=validate)
if validate:
IntermediateSnapshotNode.validate(dct)
return IntermediateSnapshotNode.from_dict(dct)
@property
def resource_type(self) -> NodeType:
@@ -66,7 +63,8 @@ class SnapshotParser(
def transform(self, node: IntermediateSnapshotNode) -> ParsedSnapshotNode:
try:
parsed_node = ParsedSnapshotNode.from_dict(node.to_dict())
dct = node.to_dict(omit_none=True)
parsed_node = ParsedSnapshotNode.from_dict(dct)
self.set_snapshot_attributes(parsed_node)
return parsed_node
except ValidationError as exc:
@@ -83,4 +81,4 @@ class SnapshotParser(
# in case there are no snapshots declared, we still want to mark this
# file as seen. But after we've finished, because we don't want to add
# files with syntax errors
self.results.get_file(file_block.file)
self.manifest.get_file(file_block.file)

View File

@@ -4,6 +4,7 @@ from typing import (
Dict,
Optional,
Set,
Union,
)
from dbt.config import RuntimeConfig
from dbt.contracts.graph.manifest import Manifest, SourceKey
@@ -21,22 +22,17 @@ from dbt.contracts.graph.unparsed import (
from dbt.exceptions import warn_or_error
from dbt.parser.schemas import SchemaParser, ParserRef
from dbt.parser.results import ParseResult
from dbt import ui
class SourcePatcher:
def __init__(
self,
results: ParseResult,
root_project: RuntimeConfig,
manifest: Manifest,
) -> None:
self.results = results
self.root_project = root_project
self.macro_manifest = Manifest.from_macros(
macros=self.results.macros,
files=self.results.files
)
self.manifest = manifest
self.schema_parsers: Dict[str, SchemaParser] = {}
self.patches_used: Dict[SourceKey, Set[str]] = {}
self.sources: Dict[str, ParsedSourceDefinition] = {}
@@ -49,8 +45,8 @@ class SourcePatcher:
if patch is None:
return unpatched
source_dct = unpatched.source.to_dict()
table_dct = unpatched.table.to_dict()
source_dct = unpatched.source.to_dict(omit_none=True)
table_dct = unpatched.table.to_dict(omit_none=True)
patch_path: Optional[Path] = None
source_table_patch: Optional[SourceTablePatch] = None
@@ -85,7 +81,7 @@ class SourcePatcher:
all_projects = self.root_project.load_dependencies()
project = all_projects[package_name]
schema_parser = SchemaParser(
self.results, project, self.root_project, self.macro_manifest
project, self.manifest, self.root_project
)
self.schema_parsers[package_name] = schema_parser
return schema_parser
@@ -103,10 +99,12 @@ class SourcePatcher:
def get_patch_for(
self,
unpatched: UnpatchedSourceDefinition,
unpatched: Union[UnpatchedSourceDefinition, ParsedSourceDefinition],
) -> Optional[SourcePatch]:
if isinstance(unpatched, ParsedSourceDefinition):
return None
key = (unpatched.package_name, unpatched.source.name)
patch: Optional[SourcePatch] = self.results.source_patches.get(key)
patch: Optional[SourcePatch] = self.manifest.source_patches.get(key)
if patch is None:
return None
if key not in self.patches_used:
@@ -119,7 +117,9 @@ class SourcePatcher:
def construct_sources(self) -> None:
# given the UnpatchedSourceDefinition and SourcePatches, combine them
# to make a beautiful baby ParsedSourceDefinition.
for unique_id, unpatched in self.results.sources.items():
for unique_id, unpatched in self.manifest.sources.items():
if isinstance(unpatched, ParsedSourceDefinition):
continue
patch = self.get_patch_for(unpatched)
patched = self.patch_source(unpatched, patch)
@@ -127,22 +127,22 @@ class SourcePatcher:
# data.
for test in self.get_source_tests(patched):
if test.config.enabled:
self.results.add_node_nofile(test)
self.manifest.add_node_nofile(test)
else:
self.results.add_disabled_nofile(test)
self.manifest.add_disabled_nofile(test)
schema_parser = self.get_schema_parser_for(unpatched.package_name)
parsed = schema_parser.parse_source(patched)
if parsed.config.enabled:
self.sources[unique_id] = parsed
else:
self.results.add_disabled_nofile(parsed)
self.manifest.add_disabled_nofile(parsed)
self.warn_unused()
def warn_unused(self) -> None:
unused_tables: Dict[SourceKey, Optional[Set[str]]] = {}
for patch in self.results.source_patches.values():
for patch in self.manifest.source_patches.values():
key = (patch.overrides, patch.name)
if key not in self.patches_used:
unused_tables[key] = None
@@ -168,7 +168,7 @@ class SourcePatcher:
'target:',
]
for key, table_names in unused_tables.items():
patch = self.results.source_patches[key]
patch = self.manifest.source_patches[key]
patch_name = f'{patch.overrides}.{patch.name}'
if table_names is None:
msg.append(
@@ -185,8 +185,8 @@ class SourcePatcher:
def patch_sources(
results: ParseResult,
root_project: RuntimeConfig,
manifest: Manifest,
) -> Dict[str, ParsedSourceDefinition]:
"""Patch all the sources found in the results. Updates results.disabled and
results.nodes.
@@ -194,6 +194,6 @@ def patch_sources(
Return a dict of ParsedSourceDefinitions, suitable for use in
manifest.sources.
"""
patcher = SourcePatcher(results, root_project)
patcher = SourcePatcher(root_project, manifest)
patcher.construct_sources()
return patcher.sources

View File

@@ -1,32 +0,0 @@
"""A collection of performance-enhancing functions that have to know just a
little bit too much to go anywhere else.
"""
from dbt.adapters.factory import get_adapter
from dbt.parser.manifest import load_manifest
from dbt.contracts.graph.manifest import Manifest
from dbt.config import RuntimeConfig
def get_full_manifest(
config: RuntimeConfig,
*,
reset: bool = False,
) -> Manifest:
"""Load the full manifest, using the adapter's internal manifest if it
exists to skip parsing internal (dbt + plugins) macros a second time.
Also, make sure that we force-laod the adapter's manifest, so it gets
attached to the adapter for any methods that need it.
"""
adapter = get_adapter(config) # type: ignore
if reset:
config.clear_dependencies()
adapter.clear_macro_manifest()
internal: Manifest = adapter.load_macro_manifest()
return load_manifest(
config,
internal,
adapter.connections.set_query_header,
)

View File

@@ -177,7 +177,7 @@ def poll_complete(
def _dict_logs(logs: List[LogMessage]) -> List[Dict[str, Any]]:
return [log.to_dict() for log in logs]
return [log.to_dict(omit_none=True) for log in logs]
class Poll(RemoteBuiltinMethod[PollParameters, PollResult]):

View File

@@ -1,8 +1,7 @@
import logbook
import logbook.queues
from jsonrpc.exceptions import JSONRPCError
from hologram import JsonSchemaMixin
from hologram.helpers import StrEnum
from dbt.dataclass_schema import StrEnum
from dataclasses import dataclass, field
from datetime import datetime, timedelta
@@ -25,8 +24,11 @@ class QueueMessageType(StrEnum):
terminating = frozenset((Error, Result, Timeout))
# This class was subclassed from JsonSchemaMixin, but it
# doesn't appear to be necessary, and Mashumaro does not
# handle logbook.LogRecord
@dataclass
class QueueMessage(JsonSchemaMixin):
class QueueMessage:
message_type: QueueMessageType

View File

@@ -3,7 +3,7 @@ from abc import abstractmethod
from copy import deepcopy
from typing import List, Optional, Type, TypeVar, Generic, Dict, Any
from hologram import JsonSchemaMixin, ValidationError
from dbt.dataclass_schema import dbtClassMixin, ValidationError
from dbt.contracts.rpc import RPCParameters, RemoteResult, RemoteMethodFlags
from dbt.exceptions import NotImplementedException, InternalException
@@ -109,7 +109,7 @@ class RemoteBuiltinMethod(RemoteMethod[Parameters, Result]):
'the run() method on builtins should never be called'
)
def __call__(self, **kwargs: Dict[str, Any]) -> JsonSchemaMixin:
def __call__(self, **kwargs: Dict[str, Any]) -> dbtClassMixin:
try:
params = self.get_parameters().from_dict(kwargs)
except ValidationError as exc:

View File

@@ -1,7 +1,7 @@
import json
from typing import Callable, Dict, Any
from hologram import JsonSchemaMixin
from dbt.dataclass_schema import dbtClassMixin
from jsonrpc.exceptions import (
JSONRPCParseError,
JSONRPCInvalidRequestException,
@@ -90,11 +90,14 @@ class ResponseManager(JSONRPCResponseManager):
@classmethod
def _get_responses(cls, requests, dispatcher):
for output in super()._get_responses(requests, dispatcher):
# if it's a result, check if it's a JsonSchemaMixin and if so call
# if it's a result, check if it's a dbtClassMixin and if so call
# to_dict
if hasattr(output, 'result'):
if isinstance(output.result, JsonSchemaMixin):
output.result = output.result.to_dict(omit_none=False)
if isinstance(output.result, dbtClassMixin):
# Note: errors in to_dict do not show up anywhere in
# the output and all you get is a generic 500 error
output.result = \
output.result.to_dict(omit_none=False)
yield output
@classmethod

View File

@@ -9,7 +9,7 @@ from typing import (
)
from typing_extensions import Protocol
from hologram import JsonSchemaMixin, ValidationError
from dbt.dataclass_schema import dbtClassMixin, ValidationError
import dbt.exceptions
import dbt.flags
@@ -187,6 +187,7 @@ def get_results_context(
class StateHandler:
"""A helper context manager to manage task handler state."""
def __init__(self, task_handler: 'RequestTaskHandler') -> None:
self.handler = task_handler
@@ -248,6 +249,7 @@ class SetArgsStateHandler(StateHandler):
"""A state handler that does not touch state on success and does not
execute the teardown
"""
def handle_completed(self):
pass
@@ -257,6 +259,7 @@ class SetArgsStateHandler(StateHandler):
class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
"""Handler for the single task triggered by a given jsonrpc request."""
def __init__(
self,
manager: TaskManagerProtocol,
@@ -280,7 +283,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
# - The actual thread that this represents, which writes its data to
# the result and logs. The atomicity of list.append() and item
# assignment means we don't need a lock.
self.result: Optional[JsonSchemaMixin] = None
self.result: Optional[dbtClassMixin] = None
self.error: Optional[RPCException] = None
self.state: TaskHandlerState = TaskHandlerState.NotStarted
self.logs: List[LogMessage] = []
@@ -388,7 +391,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
except RPCException as exc:
# RPC Exceptions come already preserialized for the jsonrpc
# framework
exc.logs = [log.to_dict() for log in self.logs]
exc.logs = [log.to_dict(omit_none=True) for log in self.logs]
exc.tags = self.tags
raise
@@ -400,6 +403,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
try:
with StateHandler(self):
self.result = self.get_result()
except (dbt.exceptions.Exception, RPCException):
# we probably got an error after the RPC call ran (and it was
# probably deps...). By now anyone who wanted to see it has seen it
@@ -449,6 +453,7 @@ class RequestTaskHandler(threading.Thread, TaskHandlerProtocol):
)
try:
cls.validate(self.task_kwargs)
return cls.from_dict(self.task_kwargs)
except ValidationError as exc:
# raise a TypeError to indicate invalid parameters so we get a nice

View File

@@ -14,11 +14,11 @@ from dbt.contracts.rpc import (
class TaskHandlerProtocol(Protocol):
started: Optional[datetime]
ended: Optional[datetime]
state: TaskHandlerState
task_id: TaskID
process: Optional[multiprocessing.Process]
state: TaskHandlerState
started: Optional[datetime] = None
ended: Optional[datetime] = None
process: Optional[multiprocessing.Process] = None
@property
def request_id(self) -> Union[str, int]:

View File

@@ -19,7 +19,7 @@ from dbt.contracts.rpc import (
TaskID,
)
from dbt.logger import LogMessage, list_handler
from dbt.perf_utils import get_full_manifest
from dbt.parser.manifest import ManifestLoader
from dbt.rpc.error import dbt_error
from dbt.rpc.gc import GarbageCollector
from dbt.rpc.task_handler_protocol import TaskHandlerProtocol, TaskHandlerMap
@@ -187,7 +187,7 @@ class TaskManager:
return True
def parse_manifest(self) -> None:
self.manifest = get_full_manifest(self.config, reset=True)
self.manifest = ManifestLoader.get_full_manifest(self.config, reset=True)
def set_compile_exception(self, exc, logs=List[LogMessage]) -> None:
assert self.last_parse.state == ManifestStatus.Compiling, \

View File

@@ -4,8 +4,7 @@ import re
from dbt.exceptions import VersionsNotCompatibleException
import dbt.utils
from hologram import JsonSchemaMixin
from hologram.helpers import StrEnum
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from typing import Optional
@@ -18,12 +17,12 @@ class Matchers(StrEnum):
@dataclass
class VersionSpecification(JsonSchemaMixin):
major: Optional[str]
minor: Optional[str]
patch: Optional[str]
prerelease: Optional[str]
build: Optional[str]
class VersionSpecification(dbtClassMixin):
major: Optional[str] = None
minor: Optional[str] = None
patch: Optional[str] = None
prerelease: Optional[str] = None
build: Optional[str] = None
matcher: Matchers = Matchers.EXACT

View File

@@ -9,7 +9,7 @@ from dbt import tracking
from dbt import ui
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.results import (
RunModelResult, collect_timing_info
NodeStatus, RunResult, collect_timing_info, RunStatus
)
from dbt.exceptions import (
NotImplementedException, CompilationException, RuntimeException,
@@ -165,6 +165,7 @@ class ExecutionContext:
"""During execution and error handling, dbt makes use of mutable state:
timing information and the newest (compiled vs executed) form of the node.
"""
def __init__(self, node):
self.timing = []
self.node = node
@@ -179,20 +180,20 @@ class BaseRunner(metaclass=ABCMeta):
self.num_nodes = num_nodes
self.skip = False
self.skip_cause: Optional[RunModelResult] = None
self.skip_cause: Optional[RunResult] = None
@abstractmethod
def compile(self, manifest: Manifest) -> Any:
pass
def get_result_status(self, result) -> Dict[str, str]:
if result.error:
return {'node_status': 'error', 'node_error': str(result.error)}
elif result.skip:
if result.status == NodeStatus.Error:
return {'node_status': 'error', 'node_error': str(result.message)}
elif result.status == NodeStatus.Skipped:
return {'node_status': 'skipped'}
elif result.fail:
elif result.status == NodeStatus.Fail:
return {'node_status': 'failed'}
elif result.warn:
elif result.status == NodeStatus.Warn:
return {'node_status': 'warn'}
else:
return {'node_status': 'passed'}
@@ -212,52 +213,62 @@ class BaseRunner(metaclass=ABCMeta):
return result
def _build_run_result(self, node, start_time, error, status, timing_info,
skip=False, fail=None, warn=None, agate_table=None):
def _build_run_result(self, node, start_time, status, timing_info, message,
agate_table=None, adapter_response=None):
execution_time = time.time() - start_time
thread_id = threading.current_thread().name
return RunModelResult(
node=node,
error=error,
skip=skip,
if adapter_response is None:
adapter_response = {}
return RunResult(
status=status,
fail=fail,
warn=warn,
execution_time=execution_time,
thread_id=thread_id,
execution_time=execution_time,
timing=timing_info,
message=message,
node=node,
agate_table=agate_table,
adapter_response=adapter_response
)
def error_result(self, node, error, start_time, timing_info):
def error_result(self, node, message, start_time, timing_info):
return self._build_run_result(
node=node,
start_time=start_time,
error=error,
status='ERROR',
timing_info=timing_info
status=RunStatus.Error,
timing_info=timing_info,
message=message,
)
def ephemeral_result(self, node, start_time, timing_info):
return self._build_run_result(
node=node,
start_time=start_time,
error=None,
status=None,
timing_info=timing_info
status=RunStatus.Success,
timing_info=timing_info,
message=None
)
def from_run_result(self, result, start_time, timing_info):
return self._build_run_result(
node=result.node,
start_time=start_time,
error=result.error,
skip=result.skip,
status=result.status,
fail=result.fail,
warn=result.warn,
timing_info=timing_info,
message=result.message,
agate_table=result.agate_table,
adapter_response=result.adapter_response
)
def skip_result(self, node, message):
thread_id = threading.current_thread().name
return RunResult(
status=RunStatus.Skipped,
thread_id=thread_id,
execution_time=0,
timing=[],
message=message,
node=node,
adapter_response={}
)
def compile_and_execute(self, manifest, ctx):
@@ -340,7 +351,7 @@ class BaseRunner(metaclass=ABCMeta):
# an error
if (
exc_str is not None and result is not None and
result.error is None and error is None
result.status != NodeStatus.Error and error is None
):
error = exc_str
@@ -389,7 +400,7 @@ class BaseRunner(metaclass=ABCMeta):
schema_name = self.node.schema
node_name = self.node.name
error = None
error_message = None
if not self.node.is_ephemeral_model:
# if this model was skipped due to an upstream ephemeral model
# failure, print a special 'error skip' message.
@@ -408,7 +419,7 @@ class BaseRunner(metaclass=ABCMeta):
'an ephemeral failure'
)
# set an error so dbt will exit with an error code
error = (
error_message = (
'Compilation Error in {}, caused by compilation error '
'in referenced ephemeral model {}'
.format(self.node.unique_id,
@@ -423,7 +434,7 @@ class BaseRunner(metaclass=ABCMeta):
self.num_nodes
)
node_result = RunModelResult(self.node, skip=True, error=error)
node_result = self.skip_result(self.node, error_message)
return node_result
def do_skip(self, cause=None):

Some files were not shown because too many files have changed in this diff Show More