Compare commits

...

63 Commits

Author SHA1 Message Date
Jeremy Cohen
2d14ee7aaa Try RotatingFileHandler 2021-11-09 20:05:57 +01:00
Nathaniel May
31acb95d7a rebased on main and added new partial parsing event 2021-11-09 11:40:18 -05:00
Nathaniel May
683190b711 fixes 2021-11-09 11:26:01 -05:00
Nathaniel May
ebb84c404f postgres adapter to use new logger 2021-11-09 11:26:01 -05:00
Nathaniel May
2ca6ce688b whitespace change 2021-11-09 11:26:01 -05:00
Nathaniel May
a40550b89d std logger for structured logging (#4231)
structured logging powered by the stdlib logger

Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
Co-authored-by: Ian Knox <81931810+iknox-fa@users.noreply.github.com>
2021-11-09 11:26:01 -05:00
Ian Knox
b2aea11cdb Struct log for adapter call sites (#4189)
graph call sites for structured logging

Co-authored-by: Nathaniel May <nathaniel.may@fishtownanalytics.com>
Co-authored-by: Emily Rockman <emily.rockman@dbtlabs.com>
2021-11-09 11:26:01 -05:00
Emily Rockman
43b39fd1aa removed redundant timestamp (#4239) 2021-11-09 11:26:01 -05:00
Emily Rockman
5cc8626e96 updates associated with merging main
- removed 3 new log call sites and replaced with structured logs
- removed 2 unused struc logs
2021-11-09 11:26:01 -05:00
Nathaniel May
f95e9efbc0 use event types in main even before the logger is set up. (#4219) 2021-11-09 11:26:01 -05:00
Nathaniel May
25c974af8c lazy logging in event module (#4210)
* switches on debug level to guard against expensive messages

* adds memoization to msg construction
2021-11-09 11:26:01 -05:00
Emily Rockman
b5c6f09a9e remove unused import (#4217) 2021-11-09 11:26:01 -05:00
Emily Rockman
bd3e623240 test/integration call sites (#4209)
* added struct logging to base

* fixed merge wierdness

* convert to use single type for integration tests

* converted to 3 reusable test types in sep module

* tweak message

* clean up and making test_types complete for future

* fix missed import
2021-11-09 11:26:01 -05:00
Emily Rockman
63343653a9 trivial logger removal (#4216) 2021-11-09 11:26:01 -05:00
Emily Rockman
d8b97c1077 call sites in core/dbt (excluding main.py) (#4202)
* add struct logging to compilation

* add struct logging to tracking

* add struct logging to utils

* add struct logging to exceptions

* fixed some misc errors

* updated to send raw ex, removed resulting circ dep
2021-11-09 11:26:01 -05:00
Emily Rockman
e0b0edaeed deps call sites (#4199)
* add struct logging to base

* add struct logging to git

* add struct logging to deps

* remove blank line

* fixed stray merge error
2021-11-09 11:26:01 -05:00
Emily Rockman
3cafc9e13f task callsites: part 2 (#4188)
* add struct logging to docs serve

* remove merge fluff

* struct logging to seed command

* converting print to use structured logging

* more structured logging print conversion

* pulling apart formatting more

* added struct logging by disecting printer.py

* add struct logging to runnable

* add struct logging to task init

* fixed formatting

* more formatting and moving things around
2021-11-09 11:26:01 -05:00
Nathaniel May
13f31aed90 scrub the secrets (#4203)
scrub secrets in event module
2021-11-09 11:26:01 -05:00
Nathaniel May
d513491046 Show Exception should trigger a stack trace (#4190) 2021-11-09 11:26:01 -05:00
Emily Rockman
281d2491a5 task call sites part 1 (#4183)
* add struct logging to base.py

* struct logging in run_operation

* add struct logging to base

* add struct logging to clean

* add struct logging to debug

* add struct logging to deps

* fix errors

* add struct logging to run.py

* fixed flake error

* add struct logging to geneerate

* added debug level stack trace

* fixed flake error

* added struct logging to compile

* added struct logging to freshness

* cleaned up errors

* resolved bug that broke everything

* removed accidental import

* fixed bug with unused args
2021-11-09 11:26:01 -05:00
Emily Rockman
9857e1dd83 parser call sites (#4177)
* convert generic_test to structured logging

* convert macros to structured logging

* add struc logging to most of manifest.py

* add struct logging to models.py

* added struct logging to partial.py

* finished conversion of manifest

* fixing errors

* fixed 1 todo and added another

* fixed bugs from merge
2021-11-09 11:26:01 -05:00
Emily Rockman
6b36b18029 config call sites (#4169)
* update config use structured logging

* WIP

* minor cleanup

* fixed merge error

* added in ShowException

* added todo to remove defaults after dropping 3.6

* removed todo that is obsolete
2021-11-09 11:26:01 -05:00
Ian Knox
d8868c5197 Dataclass compatibility (#4180)
* use __post_init__() instead of fake dataclass member vars
2021-11-09 11:26:01 -05:00
Emily Rockman
b141620125 contracts call sites (#4166)
* first pass adding structured logging
2021-11-09 11:26:01 -05:00
Emily Rockman
51d8440dd4 Change Graph logger call sites (#4165)
graph call sites for structured logging
2021-11-09 11:26:01 -05:00
Nathaniel May
5b2562a919 Client call sites (#4163)
update log call sites with new event system
2021-11-09 11:26:01 -05:00
Nathaniel May
44a9da621e Handle exec info (#4168)
handle exec info
2021-11-09 11:26:01 -05:00
Emily Rockman
69aa6bf964 context call sites (#4164)
* updated context dir to new structured logging
2021-11-09 11:26:01 -05:00
Nathaniel May
f9ef9da110 Initial structured logging work with fire_event (#4137)
add event type modeling and fire_event calls
2021-11-09 11:26:01 -05:00
Nathaniel May
57ae9180c2 init 2021-11-09 11:26:01 -05:00
Jeremy Cohen
efe926d20c Change user instead of pass (#4250) 2021-11-09 13:10:34 +01:00
Jeremy Cohen
1081b8e720 Improve error msg on pip install dbt (#4244) 2021-11-09 10:40:45 +01:00
Kyle Wigley
8205921c4b Update docs (#4241)
Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
2021-11-08 19:47:28 -05:00
Jeremy Cohen
da6c211611 Wrap get_batch_size() in return() (#4240) 2021-11-09 00:46:15 +01:00
Jeremy Cohen
354c1e0d4d Rm py36 tests, pkg metadata, bump reqs (#4223) 2021-11-09 00:19:09 +01:00
Gerda Shank
855419d698 [#4071] Add metrics feature (#4235)
* first cut at supporting metrics definitions

* teach dbt about metrics

* wip

* support partial parsing for metrics

* working on tests

* Fix some tests

* Add partial parsing metrics test

* Fix some more tests

* Update CHANGELOG.md

* Fix partial parsing yaml file to correct model syntax

Co-authored-by: Drew Banin <drew@fishtownanalytics.com>
2021-11-08 17:44:01 -05:00
Gerda Shank
e94fd61b24 Issue message instead of exception when patch does not have a matching (#4236) node 2021-11-08 15:35:14 -05:00
Kyle Wigley
4cf9b73c3d Raise parsing error instead of compilation when extracting test args (#4237) 2021-11-08 14:51:52 -05:00
Jeremy Cohen
8442fb66a5 Reorganize global project (macros) (#4154)
* Add integration tests

* Reorganize + dispatch more global macros

* Reorg materializations subdir

* Move around + document generic tests

* Fix failing tests

* Fix merge conflict

* Grab fix from #4148

* PR feedback

* Fixup

* Add load_relation back, it was nice

* Last few test fixes

* Rm incremental_upsert, now unused

* Add changelog entry
2021-11-08 19:09:54 +01:00
dependabot[bot]
f8cefa3eff Update agate requirement from <1.6.2,>=1.6 to >=1.6,<1.6.4 in /core (#3585)
Updates the requirements on [agate](https://github.com/wireservice/agate) to permit the latest version.
- [Release notes](https://github.com/wireservice/agate/releases)
- [Changelog](https://github.com/wireservice/agate/blob/master/CHANGELOG.rst)
- [Commits](https://github.com/wireservice/agate/compare/1.6.0...1.6.3)

---
updated-dependencies:
- dependency-name: agate
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-11-08 18:41:04 +01:00
dependabot[bot]
d83e0fb8d8 Bump mashumaro from 2.5 to 2.9 in /core (#4193)
Bumps [mashumaro](https://github.com/Fatal1ty/mashumaro) from 2.5 to 2.9.
- [Release notes](https://github.com/Fatal1ty/mashumaro/releases)
- [Commits](https://github.com/Fatal1ty/mashumaro/compare/v2.5...v2.9)

---
updated-dependencies:
- dependency-name: mashumaro
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2021-11-08 18:40:25 +01:00
Gerda Shank
3e9da06365 [#3885] Skip partial parsing if project env vars change (#4212)
* [#3885] Skip partial parsing if project env vars change

* Support env_vars in the profile
2021-11-08 11:51:38 -05:00
Gerda Shank
bda70c988e [#3885] Partially parse when environment variables in schema files change (#4162)
* [#3885] Partially parse when environment variables in schema files
change

* Add documentation for test kwargs

* Add test and fix for schema configs with env_var
2021-11-08 11:28:43 -05:00
Rachel
229e897070 Clears adapters before registering to fix dbt-server cacheing behavior (#4218) 2021-11-08 10:33:39 -05:00
Benoit Perigaud
f20e83a32b Fix/dbt deps retry none answer (#4225)
* Fix issue #4178
Allow retries when the answer is None

* Include fix for #4178
Allow retries when the answer from dbt deps is None

* Add link to the PR

* Update exception and shorten line size

* Add test when dbt deps returns None
2021-11-08 12:30:38 +01:00
Jeremy Cohen
dd84f9a896 Raise error on pip install dbt (#4133)
* Raise error on pip install dbt

* Fix relative path logic

* Do not build dist for dbt

* Fix long descriptions

* Trigger code checks

* Using root readme more trouble than good

* only fail on install, not build

* Edit dist script. Avoid README duplication

* jk, be less clever

* Ignore 'dbt' source distribution when testing

* Add changelog entry

Co-authored-by: Kyle Wigley <kyle@dbtlabs.com>
2021-11-07 17:55:30 +01:00
Mila Page
e6df4266f6 Parser no longer takes greedy. Accepts indirect selection, a bool. (#4104)
* Parser no longer takes greedy. Accepts indirect selection, a bool.

* Remove references to greedy and supporting functions.

* 1. Set testing flag default to True. 2. Improve arg parsing.

* Update tests and add new case for when flag unset.

* Update names and styling to fit test requirements. Add default value for option.

* Correct several failing tests now that default behavior was flipped.

* Tests expect eager on by default.

* All but selector test passing.

* Get integration tests working, add them, and mix in selector syntax.

* Clean code and correct test.

* Add changelog entry

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
2021-11-07 17:41:56 +01:00
Christophe Oudar
b591e1a2b7 Use common columns for incremental schema changes (#4170)
* Use common columns for incremental schema changes

* on_schema_change:append_new_columns should gracefully handle column removal

* review changes

* Lean approach for `process_schema_changes` to simplify

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
2021-11-07 17:31:30 +01:00
Jeremy Cohen
3dab058c73 incorporate_indirect_nodes should pass if not needed (#4214)
* Pass incorporate_indirect_nodes if not needed

* Fix flake8

* Add changelog entry
2021-11-05 16:55:58 +01:00
Robert
c7bc6eb812 Add error surfacing for git cloning errors (#4124)
* Add error surfacing for git cloning errors

* Update CHANGELOG.md

* Fix formatting and remove redundant except: raise

* Turn error handling for duplicate packages back on
2021-11-05 10:12:07 +01:00
Jeremy Cohen
c690ecc1fd Fixup changelog (#4206) 2021-11-04 13:54:23 +01:00
Jeremy Cohen
73e272f06e Add get_where_subquery to test namespace (#4197)
* Add get_where_subquery to test namespace

* Add integration test

* Fix test, add comment, smarter approach

* Fix unit tests

* Add changelog entry
2021-11-04 11:53:28 +01:00
leahwicz
95d087b51b Bumping artifact versions for v1 (#4191)
* Bumping artifact versions for v1

* Adding schema in Changelog

* Update CHANGELOG.md

* Update changelog entry

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
2021-11-04 11:19:36 +01:00
Jeremy Cohen
40ae6b6bc8 Any subset, strict or not (#4160) 2021-11-02 17:59:46 +01:00
Jeremy Cohen
fe20534a98 Add extra graph edges for build only (#4143)
* Resolve extra graph edges for build only

* Fix flake8

* Change test to reflect functional change

* Rename method + args. Add changelog entry
2021-11-02 17:41:14 +01:00
leahwicz
dd7af477ac Perf improvement to subgraph selection (#4155)
Perf improvement to get_subset_graph
Co-authored-by: Ian Knox <ian.knox@fishtownanalytics.com>
2021-10-29 16:06:09 -05:00
Jeremy Cohen
178f74b753 Fix comma if only removing columns in on_schema_change: sync_all_columns (#4148)
* Fix comma if only removing in on_schema_change: sync

* Add changelog entry
2021-10-28 10:19:05 +02:00
Emily Rockman
a14f563ec8 port error scrub from 0.21.latests up for main (#4145) 2021-10-27 14:06:22 -05:00
Kyle Wigley
ff109e1806 Expose lib to to run tasks and compile/execute sql (#4111) 2021-10-27 13:30:46 -04:00
Frank Cash
5e46694b68 assertRaisesRegexp => assertRaisesRegex (#4136)
* assertRaisesRegexp => assertRaisesRegex

* Update CHANGELOG.md

* Update CHANGELOG.md
2021-10-27 13:05:24 +02:00
Gerda Shank
73af9a56e5 [#3885] Handle env_vars in partial parsing of SQL files (#4101)
* [#3885] Handle env_vars in partial parsing

* Comment method to build env_vars_to_source_files
2021-10-26 11:16:36 -04:00
kadero
d2aa920275 Feature: nullable error_after in source (#3955)
* Add nullable error after feature

* add merge_error_after method

* Fix FreshnessThreshold merged test

* Fix other tests

* Fix merge error after

* Fix test docs generate integration test

* Fix source integration test

* Typo and fix linting.

* Fix mypy test

* More terse way to express merge_freshness_time_thresholds

* Update Changelog.md

* Add integration test

* Fix conflict

* Fix contributing.md

* Fix integration tests

* Move up changelog entry

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
2021-10-26 15:23:57 +02:00
Gerda Shank
c34f3530c8 Use platform agnostic code when searching generic test directory (#4131) 2021-10-25 22:46:36 -04:00
204 changed files with 7762 additions and 3076 deletions

View File

@@ -1,6 +1,6 @@
module.exports = ({ context }) => {
const defaultPythonVersion = "3.8";
const supportedPythonVersions = ["3.6", "3.7", "3.8", "3.9"];
const supportedPythonVersions = ["3.7", "3.8", "3.9"];
const supportedAdapters = ["postgres"];
// if PR, generate matrix based on files changed and PR labels

View File

@@ -77,7 +77,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.6, 3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
python-version: [3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
env:
TOXENV: "unit"
@@ -167,7 +167,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: [3.7, 3.8, 3.9]
steps:
- name: Set up Python ${{ matrix.python-version }}
@@ -198,8 +198,9 @@ jobs:
dbt --version
- name: Install source distributions
# ignore dbt-1.0.0, which intentionally raises an error when installed from source
run: |
find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
find ./dist/dbt-[a-z]*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |

View File

@@ -1,5 +1,45 @@
## dbt-core 1.0.0 (Release TBD)
### Breaking changes
- Replace `greedy` flag/property for test selection with `indirect_selection: eager/cautious` flag/property. Set to `eager` by default. **Note:** This reverts test selection to its pre-v0.20 behavior by default. `dbt test -s my_model` _will_ select multi-parent tests, such as `relationships`, that depend on unselected resources. To achieve the behavior change in v0.20 + v0.21, set `--indirect-selection=cautious` on the CLI or `indirect_selection: cautious` in yaml selectors. ([#4082](https://github.com/dbt-labs/dbt-core/issues/4082), [#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- In v1.0.0, **`pip install dbt` will raise an explicit error.** Instead, please use `pip install dbt-<adapter>` (to use dbt with that database adapter), or `pip install dbt-core` (for core functionality). For parity with the previous behavior of `pip install dbt`, you can use: `pip install dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery` ([#4100](https://github.com/dbt-labs/dbt-core/issues/4100), [#4133](https://github.com/dbt-labs/dbt-core/pull/4133))
- Reorganize the `global_project` (macros) into smaller files with clearer names. Remove unused global macros: `column_list`, `column_list_for_create_table`, `incremental_upsert` ([#4154](https://github.com/dbt-labs/dbt-core/pull/4154))
### Features
- Allow nullable `error_after` in source freshness ([#3874](https://github.com/dbt-labs/dbt-core/issues/3874), [#3955](https://github.com/dbt-labs/dbt-core/pull/3955))
- Increase performance of graph subset selection ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135),[#4155](https://github.com/dbt-labs/dbt-core/pull/4155))
- Speed up node selection by skipping indirect node incorporation when not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213),[#4214](https://github.com/dbt-labs/dbt-core/pull/4214))
- Add metrics nodes ([#4071](https://github.com/dbt-labs/dbt-core/issues/4071), [#4235](https://github.com/dbt-labs/dbt-core/pull/4235))
### Fixes
- Changes unit tests using `assertRaisesRegexp` to `assertRaisesRegex` ([#4136](https://github.com/dbt-labs/dbt-core/issues/4132), [#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- Allow retries when the answer from a `dbt deps` is `None` ([#4178](https://github.com/dbt-labs/dbt-core/issues/4178))
### Docs
- Fix non-alphabetical sort of Source Tables in source overview page ([docs#81](https://github.com/dbt-labs/dbt-docs/issues/81), [docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- Add title tag to node elements in tree ([docs#202](https://github.com/dbt-labs/dbt-docs/issues/202), [docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
- Account for test rename: `schema` &rarr; `generic`, `data` &rarr;` singular`. Use `test_metadata` instead of `schema`/`data` tags to differentiate ([docs#216](https://github.com/dbt-labs/dbt-docs/issues/216), [docs#222](https://github.com/dbt-labs/dbt-docs/pull/222))
- Add metrics ([core#216](https://github.com/dbt-labs/dbt-core/issues/4235), [docs#223](https://github.com/dbt-labs/dbt-docs/pull/223))
### Under the hood
- Bump artifact schema versions for 1.0.0: manifest v4, run results v4, sources v3. Notable changes: schema test + data test nodes are renamed to generic test + singular test nodes; freshness threshold default values ([#4191](https://github.com/dbt-labs/dbt-core/pull/4191))
- Speed up node selection by skipping `incorporate_indirect_nodes` if not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213), [#4214](https://github.com/dbt-labs/dbt-core/issues/4214))
- When on_schema_change is set, pass common columns as dest_columns in incremental merge macros ([#4144](https://github.com/dbt-labs/dbt-core/issues/4144), [#4170](https://github.com/dbt-labs/dbt-core/pull/4170))
- Clear adapters before registering in lib module config generation ([#4218](https://github.com/dbt-labs/dbt-core/pull/4218))
- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#4223](https://github.com/dbt-labs/dbt-core/pull/4223))
Contributors:
- [@kadero](https://github.com/kadero) ([#3955](https://github.com/dbt-labs/dbt-core/pull/3955))
- [@frankcash](https://github.com/frankcash) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- [@Kayrnt](https://github.com/Kayrnt) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4170))
- [@VersusFacit](https://github.com/VersusFacit) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@joellabes](https://github.com/joellabes) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@b-per](https://github.com/b-per) ([#4225](https://github.com/dbt-labs/dbt-core/pull/4225))
- [@salmonsd](https://github.com/salmonsd) ([docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- [@miike](https://github.com/miike) ([docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
## dbt-core 1.0.0b2 (October 25, 2021)
### Breaking changes
@@ -15,12 +55,16 @@
- `dbt init` is now interactive, generating profiles.yml when run inside existing project ([#3625](https://github.com/dbt-labs/dbt/pull/3625))
### Under the hood
- Fix intermittent errors in partial parsing tests ([#4060](https://github.com/dbt-labs/dbt-core/issues/4060), [#4068](https://github.com/dbt-labs/dbt-core/pull/4068))
- Make finding disabled nodes more consistent ([#4069](https://github.com/dbt-labs/dbt-core/issues/4069), [#4073](https://github.com/dbt-labas/dbt-core/pull/4073))
- Remove connection from `render_with_context` during parsing, thereby removing misleading log message ([#3137](https://github.com/dbt-labs/dbt-core/issues/3137), [#4062](https://github.com/dbt-labas/dbt-core/pull/4062))
- Wait for postgres docker container to be ready in `setup_db.sh`. ([#3876](https://github.com/dbt-labs/dbt-core/issues/3876), [#3908](https://github.com/dbt-labs/dbt-core/pull/3908))
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Dependency updates ([#4079](https://github.com/dbt-labs/dbt-core/pull/4079)), ([#3532](https://github.com/dbt-labs/dbt-core/pull/3532)
- Schedule partial parsing for SQL files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4101](https://github.com/dbt-labs/dbt-core/pull/4101))
- Schedule partial parsing for schema files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4162](https://github.com/dbt-labs/dbt-core/pull/4162))
- Skip partial parsing when env_vars change in dbt_project or profile ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4212](https://github.com/dbt-labs/dbt-core/pull/4212))
Contributors:
- [@sungchun12](https://github.com/sungchun12) ([#4017](https://github.com/dbt-labs/dbt/pull/4017))
@@ -54,7 +98,6 @@ Contributors:
- Fixed bug with `error_if` test option ([#4070](https://github.com/dbt-labs/dbt-core/pull/4070))
### Under the hood
- Enact deprecation for `materialization-return` and replace deprecation warning with an exception. ([#3896](https://github.com/dbt-labs/dbt-core/issues/3896))
- Build catalog for only relational, non-ephemeral nodes in the graph ([#3920](https://github.com/dbt-labs/dbt-core/issues/3920))
- Enact deprecation to remove the `release` arg from the `execute_macro` method. ([#3900](https://github.com/dbt-labs/dbt-core/issues/3900))
@@ -67,6 +110,7 @@ Contributors:
- Update the default project paths to be `analysis-paths = ['analyses']` and `test-paths = ['tests]`. Also have starter project set `analysis-paths: ['analyses']` from now on. ([#2659](https://github.com/dbt-labs/dbt-core/issues/2659))
- Define the data type of `sources` as an array of arrays of string in the manifest artifacts. ([#3966](https://github.com/dbt-labs/dbt-core/issues/3966), [#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- Marked `source-paths` and `data-paths` as deprecated keys in `dbt_project.yml` in favor of `model-paths` and `seed-paths` respectively.([#1607](https://github.com/dbt-labs/dbt-core/issues/1607))
- Surface git errors to `stdout` when cloning dbt packages from Github. ([#3167](https://github.com/dbt-labs/dbt-core/issues/3167))
Contributors:
@@ -75,14 +119,25 @@ Contributors:
- [@samlader](https://github.com/samlader) ([#3993](https://github.com/dbt-labs/dbt-core/pull/3993))
- [@yu-iskw](https://github.com/yu-iskw) ([#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- [@laxjesse](https://github.com/laxjesse) ([#4019](https://github.com/dbt-labs/dbt-core/pull/4019))
- [@gitznik](https://github.com/Gitznik) ([#4124](https://github.com/dbt-labs/dbt-core/pull/4124))
## dbt 0.21.1 (Release TBD)
### Fixes
- Add `get_where_subquery` to test macro namespace, fixing custom generic tests that rely on introspecting the `model` arg at parse time ([#4195](https://github.com/dbt-labs/dbt/issues/4195), [#4197](https://github.com/dbt-labs/dbt/pull/4197))
## dbt 0.21.1rc1 (November 03, 2021)
### Fixes
- Performance: Use child_map to find tests for nodes in resolve_graph ([#4012](https://github.com/dbt-labs/dbt/issues/4012), [#4022](https://github.com/dbt-labs/dbt/pull/4022))
- Switch `unique_field` from abstractproperty to optional property. Add docstring ([#4025](https://github.com/dbt-labs/dbt/issues/4025), [#4028](https://github.com/dbt-labs/dbt/pull/4028))
- Include only relational nodes in `database_schema_set` ([#4063](https://github.com/dbt-labs/dbt-core/issues/4063), [#4077](https://github.com/dbt-labs/dbt-core/pull/4077))
- Added support for tests on databases that lack real boolean types. ([#4084](https://github.com/dbt-labs/dbt-core/issues/4084))
- Scrub secrets coming from `CommandError`s so they don't get exposed in logs. ([#4138](https://github.com/dbt-labs/dbt-core/pull/4138))
- Syntax fix in `alter_relation_add_remove_columns` if only removing columns in `on_schema_change: sync_all_columns` ([#4147](https://github.com/dbt-labs/dbt-core/issues/4147))
- Increase performance of graph subset selection ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135),[#4155](https://github.com/dbt-labs/dbt-core/pull/4155))
- Add downstream test edges for `build` task _only_. Restore previous graph construction, compilation performance, and node selection behavior (`test+`) for all other tasks ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135), [#4143](https://github.com/dbt-labs/dbt-core/pull/4143))
- Don't require a strict/proper subset when adding testing edges to specialized graph for `build` ([#4158](https://github.com/dbt-labs/dbt-core/issues/4135), [#4158](https://github.com/dbt-labs/dbt-core/pull/4160))
Contributors:
- [@ljhopkins2](https://github.com/ljhopkins2) ([#4077](https://github.com/dbt-labs/dbt-core/pull/4077))

42
core/README.md Normal file
View File

@@ -0,0 +1,42 @@
<p align="center">
<img src="https://raw.githubusercontent.com/dbt-labs/dbt-core/fa1ea14ddfb1d5ae319d5141844910dd53ab2834/etc/dbt-core.svg" alt="dbt logo" width="750"/>
</p>
<p align="center">
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml/badge.svg?event=push" alt="Unit Tests Badge"/>
</a>
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml/badge.svg?event=push" alt="Integration Tests Badge"/>
</a>
</p>
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
![architecture](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-arch.png)
## Understanding dbt
Analysts using dbt can transform their data by simply writing select statements, while dbt handles turning these statements into tables and views in a data warehouse.
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
![dbt dag](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-dag.png)
## Getting started
- [Install dbt](https://docs.getdbt.com/docs/installation)
- Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/)
## Join the dbt Community
- Be part of the conversation in the [dbt Community Slack](http://community.getdbt.com/)
- Read more on the [dbt Community Discourse](https://discourse.getdbt.com)
## Reporting bugs and contributing code
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-core/issues/new)
- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md)
## Code of Conduct
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct).

View File

@@ -18,7 +18,17 @@ from dbt.contracts.graph.manifest import Manifest
from dbt.adapters.base.query_headers import (
MacroQueryStringSetter,
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import (
NewConnection,
ConnectionReused,
ConnectionLeftOpen,
ConnectionLeftOpen2,
ConnectionClosed,
ConnectionClosed2,
Rollback,
RollbackFailed
)
from dbt import flags
@@ -136,14 +146,10 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
if conn.name == conn_name and conn.state == 'open':
return conn
logger.debug(
'Acquiring new {} connection "{}".'.format(self.TYPE, conn_name))
fire_event(NewConnection(conn_name=conn_name, conn_type=self.TYPE))
if conn.state == 'open':
logger.debug(
'Re-using an available connection from the pool (formerly {}).'
.format(conn.name)
)
fire_event(ConnectionReused(conn_name=conn_name))
else:
conn.handle = LazyHandle(self.open)
@@ -190,11 +196,9 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
with self.lock:
for connection in self.thread_connections.values():
if connection.state not in {'closed', 'init'}:
logger.debug("Connection '{}' was left open."
.format(connection.name))
fire_event(ConnectionLeftOpen(conn_name=connection.name))
else:
logger.debug("Connection '{}' was properly closed."
.format(connection.name))
fire_event(ConnectionClosed(conn_name=connection.name))
self.close(connection)
# garbage collect these connections
@@ -220,20 +224,17 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
try:
connection.handle.rollback()
except Exception:
logger.debug(
'Failed to rollback {}'.format(connection.name),
exc_info=True
)
fire_event(RollbackFailed(conn_name=connection.name))
@classmethod
def _close_handle(cls, connection: Connection) -> None:
"""Perform the actual close operation."""
# On windows, sometimes connection handles don't have a close() attr.
if hasattr(connection.handle, 'close'):
logger.debug(f'On {connection.name}: Close')
fire_event(ConnectionClosed2(conn_name=connection.name))
connection.handle.close()
else:
logger.debug(f'On {connection.name}: No close available on handle')
fire_event(ConnectionLeftOpen2(conn_name=connection.name))
@classmethod
def _rollback(cls, connection: Connection) -> None:
@@ -244,7 +245,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
f'"{connection.name}", but it does not have one open!'
)
logger.debug(f'On {connection.name}: ROLLBACK')
fire_event(Rollback(conn_name=connection.name))
cls._rollback_handle(connection)
connection.transaction_open = False
@@ -256,7 +257,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
return connection
if connection.transaction_open and connection.handle:
logger.debug('On {}: ROLLBACK'.format(connection.name))
fire_event(Rollback(conn_name=connection.name))
cls._rollback_handle(connection)
connection.transaction_open = False

View File

@@ -29,7 +29,8 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.manifest import Manifest, MacroManifest
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.exceptions import warn_or_error
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import CacheMiss, ListRelations
from dbt.utils import filter_null_values, executor
from dbt.adapters.base.connections import Connection, AdapterResponse
@@ -288,9 +289,12 @@ class BaseAdapter(metaclass=AdapterMeta):
"""Check if the schema is cached, and by default logs if it is not."""
if (database, schema) not in self.cache:
logger.debug(
'On "{}": cache miss for schema "{}.{}", this is inefficient'
.format(self.nice_connection_name(), database, schema)
fire_event(
CacheMiss(
conn_name=self.nice_connection_name,
database=database,
schema=schema
)
)
return False
else:
@@ -672,9 +676,8 @@ class BaseAdapter(metaclass=AdapterMeta):
relations = self.list_relations_without_caching(
schema_relation
)
fire_event(ListRelations(database=database, schema=schema, relations=relations))
logger.debug('with database={}, schema={}, relations={}'
.format(database, schema, relations))
return relations
def _make_match_kwargs(
@@ -968,10 +971,10 @@ class BaseAdapter(metaclass=AdapterMeta):
'dbt could not find a macro with the name "{}" in {}'
.format(macro_name, package_name)
)
# This causes a reference cycle, as generate_runtime_macro()
# This causes a reference cycle, as generate_runtime_macro_context()
# ends up calling get_adapter, so the import has to be here.
from dbt.context.providers import generate_runtime_macro
macro_context = generate_runtime_macro(
from dbt.context.providers import generate_runtime_macro_context
macro_context = generate_runtime_macro_context(
macro=macro,
config=self.config,
manifest=manifest,

View File

@@ -1,11 +1,26 @@
import threading
from collections import namedtuple
from copy import deepcopy
from typing import List, Iterable, Optional, Dict, Set, Tuple, Any
import threading
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from dbt.logger import CACHE_LOGGER as logger
from dbt.utils import lowercase
import dbt.exceptions
from dbt.events.functions import fire_event
from dbt.events.types import (
AddLink,
AddRelation,
DropCascade,
DropMissingRelation,
DropRelation,
DumpAfterAddGraph,
DumpAfterRenameSchema,
DumpBeforeAddGraph,
DumpBeforeRenameSchema,
RenameSchema,
TemporaryRelation,
UncachedRelation,
UpdateReference
)
from dbt.utils import lowercase
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
@@ -157,12 +172,6 @@ class _CachedRelation:
return [dot_separated(r) for r in self.referenced_by]
def lazy_log(msg, func):
if logger.disabled:
return
logger.debug(msg.format(func()))
class RelationsCache:
"""A cache of the relations known to dbt. Keeps track of relationships
declared between tables and handles renames/drops as a real database would.
@@ -278,6 +287,7 @@ class RelationsCache:
referenced.add_reference(dependent)
# TODO: Is this dead code? I can't seem to find it grepping the codebase.
def add_link(self, referenced, dependent):
"""Add a link between two relations to the database. If either relation
does not exist, it will be added as an "external" relation.
@@ -297,11 +307,7 @@ class RelationsCache:
# if we have not cached the referenced schema at all, we must be
# referring to a table outside our control. There's no need to make
# a link - we will never drop the referenced relation during a run.
logger.debug(
'{dep!s} references {ref!s} but {ref.database}.{ref.schema} '
'is not in the cache, skipping assumed external relation'
.format(dep=dependent, ref=ref_key)
)
fire_event(UncachedRelation(dep_key=dependent, ref_key=ref_key))
return
if ref_key not in self.relations:
# Insert a dummy "external" relation.
@@ -317,9 +323,7 @@ class RelationsCache:
type=referenced.External
)
self.add(dependent)
logger.debug(
'adding link, {!s} references {!s}'.format(dep_key, ref_key)
)
fire_event(AddLink(dep_key=dep_key, ref_key=ref_key))
with self.lock:
self._add_link(ref_key, dep_key)
@@ -330,14 +334,12 @@ class RelationsCache:
:param BaseRelation relation: The underlying relation.
"""
cached = _CachedRelation(relation)
logger.debug('Adding relation: {!s}'.format(cached))
lazy_log('before adding: {!s}', self.dump_graph)
fire_event(AddRelation(relation=cached))
fire_event(DumpBeforeAddGraph(graph_func=self.dump_graph))
with self.lock:
self._setdefault(cached)
lazy_log('after adding: {!s}', self.dump_graph)
fire_event(DumpAfterAddGraph(graph_func=self.dump_graph))
def _remove_refs(self, keys):
"""Removes all references to all entries in keys. This does not
@@ -359,13 +361,10 @@ class RelationsCache:
:param _CachedRelation dropped: An existing _CachedRelation to drop.
"""
if dropped not in self.relations:
logger.debug('dropped a nonexistent relationship: {!s}'
.format(dropped))
fire_event(DropMissingRelation(relation=dropped))
return
consequences = self.relations[dropped].collect_consequences()
logger.debug(
'drop {} is cascading to {}'.format(dropped, consequences)
)
fire_event(DropCascade(dropped=dropped, consequences=consequences))
self._remove_refs(consequences)
def drop(self, relation):
@@ -380,7 +379,7 @@ class RelationsCache:
:param str identifier: The identifier of the relation to drop.
"""
dropped = _make_key(relation)
logger.debug('Dropping relation: {!s}'.format(dropped))
fire_event(DropRelation(dropped=dropped))
with self.lock:
self._drop_cascade_relation(dropped)
@@ -403,9 +402,8 @@ class RelationsCache:
# update all the relations that refer to it
for cached in self.relations.values():
if cached.is_referenced_by(old_key):
logger.debug(
'updated reference from {0} -> {2} to {1} -> {2}'
.format(old_key, new_key, cached.key())
fire_event(
UpdateReference(old_key=old_key, new_key=new_key, cached_key=cached.key())
)
cached.rename_key(old_key, new_key)
@@ -435,10 +433,7 @@ class RelationsCache:
)
if old_key not in self.relations:
logger.debug(
'old key {} not found in self.relations, assuming temporary'
.format(old_key)
)
fire_event(TemporaryRelation(key=old_key))
return False
return True
@@ -456,11 +451,9 @@ class RelationsCache:
"""
old_key = _make_key(old)
new_key = _make_key(new)
logger.debug('Renaming relation {!s} to {!s}'.format(
old_key, new_key
))
fire_event(RenameSchema(old_key=old_key, new_key=new_key))
lazy_log('before rename: {!s}', self.dump_graph)
fire_event(DumpBeforeRenameSchema(graph_func=self.dump_graph))
with self.lock:
if self._check_rename_constraints(old_key, new_key):
@@ -468,7 +461,7 @@ class RelationsCache:
else:
self._setdefault(_CachedRelation(new))
lazy_log('after rename: {!s}', self.dump_graph)
fire_event(DumpAfterRenameSchema(graph_func=self.dump_graph))
def get_relations(
self, database: Optional[str], schema: Optional[str]

View File

@@ -8,10 +8,9 @@ from dbt.include.global_project import (
PACKAGE_PATH as GLOBAL_PROJECT_PATH,
PROJECT_NAME as GLOBAL_PROJECT_NAME,
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import AdapterImportError, PluginLoadError
from dbt.contracts.connection import Credentials, AdapterRequiredConfig
from dbt.adapters.protocol import (
AdapterProtocol,
AdapterConfig,
@@ -67,11 +66,12 @@ class AdapterContainer:
# if we failed to import the target module in particular, inform
# the user about it via a runtime error
if exc.name == 'dbt.adapters.' + name:
fire_event(AdapterImportError(exc=exc))
raise RuntimeException(f'Could not find adapter type {name}!')
logger.info(f'Error importing adapter: {exc}')
# otherwise, the error had to have come from some underlying
# library. Log the stack trace.
logger.debug('', exc_info=True)
fire_event(PluginLoadError())
raise
plugin: AdapterPlugin = mod.Plugin
plugin_type = plugin.adapter.type()

View File

@@ -10,7 +10,8 @@ from dbt.adapters.base import BaseConnectionManager
from dbt.contracts.connection import (
Connection, ConnectionState, AdapterResponse
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import ConnectionUsed, SQLQuery, SQLCommit, SQLQueryStatus
class SQLConnectionManager(BaseConnectionManager):
@@ -58,9 +59,7 @@ class SQLConnectionManager(BaseConnectionManager):
connection = self.get_thread_connection()
if auto_begin and connection.transaction_open is False:
self.begin()
logger.debug('Using {} connection "{}".'
.format(self.TYPE, connection.name))
fire_event(ConnectionUsed(conn_type=self.TYPE, conn_name=connection.name))
with self.exception_handler(sql):
if abridge_sql_log:
@@ -68,19 +67,16 @@ class SQLConnectionManager(BaseConnectionManager):
else:
log_sql = sql
logger.debug(
'On {connection_name}: {sql}',
connection_name=connection.name,
sql=log_sql,
)
fire_event(SQLQuery(conn_name=connection.name, sql=log_sql))
pre = time.time()
cursor = connection.handle.cursor()
cursor.execute(sql, bindings)
logger.debug(
"SQL status: {status} in {elapsed:0.2f} seconds",
status=self.get_response(cursor),
elapsed=(time.time() - pre)
fire_event(
SQLQueryStatus(
status=self.get_response(cursor), elapsed=round((time.time() - pre), 2)
)
)
return connection, cursor
@@ -160,7 +156,7 @@ class SQLConnectionManager(BaseConnectionManager):
'Tried to commit transaction on connection "{}", but '
'it does not have one open!'.format(connection.name))
logger.debug('On {}: COMMIT'.format(connection.name))
fire_event(SQLCommit(conn_name=connection.name))
self.add_commit_query()
connection.transaction_open = False

View File

@@ -6,7 +6,9 @@ from dbt.contracts.connection import Connection
import dbt.exceptions
from dbt.adapters.base import BaseAdapter, available
from dbt.adapters.sql import SQLConnectionManager
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import ColTypeChange, SchemaCreation, SchemaDrop
from dbt.adapters.base.relation import BaseRelation
@@ -116,8 +118,13 @@ class SQLAdapter(BaseAdapter):
target_column.can_expand_to(reference_column):
col_string_size = reference_column.string_size()
new_type = self.Column.string_type(col_string_size)
logger.debug("Changing col type from {} to {} in table {}",
target_column.data_type, new_type, current)
fire_event(
ColTypeChange(
orig_type=target_column.data_type,
new_type=new_type,
table=current,
)
)
self.alter_column_type(current, column_name, new_type)
@@ -175,7 +182,7 @@ class SQLAdapter(BaseAdapter):
def create_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
logger.debug('Creating schema "{}"', relation)
fire_event(SchemaCreation(relation=relation))
kwargs = {
'relation': relation,
}
@@ -186,7 +193,7 @@ class SQLAdapter(BaseAdapter):
def drop_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
logger.debug('Dropping schema "{}".', relation)
fire_event(SchemaDrop(relation=relation))
kwargs = {
'relation': relation,
}

View File

@@ -2,7 +2,12 @@ import re
import os.path
from dbt.clients.system import run_cmd, rmdir
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import (
GitSparseCheckoutSubdirectory, GitProgressCheckoutRevision,
GitProgressUpdatingExistingDependency, GitProgressPullingNewDependency,
GitNothingToDo, GitProgressUpdatedCheckoutRange, GitProgressCheckedOutAt
)
import dbt.exceptions
from packaging import version
@@ -12,13 +17,23 @@ def _is_commit(revision: str) -> bool:
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
def _raise_git_cloning_error(repo, revision, error):
stderr = error.stderr.decode('utf-8').strip()
if 'usage: git' in stderr:
stderr = stderr.split('\nusage: git')[0]
if re.match("fatal: destination path '(.+)' already exists", stderr):
raise error
dbt.exceptions.bad_package_spec(repo, revision, stderr)
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirectory=None):
has_revision = revision is not None
is_commit = _is_commit(revision or "")
clone_cmd = ['git', 'clone', '--depth', '1']
if subdirectory:
logger.debug(' Subdirectory specified: {}, using sparse checkout.'.format(subdirectory))
fire_event(GitSparseCheckoutSubdirectory(subdir=subdirectory))
out, _ = run_cmd(cwd, ['git', '--version'], env={'LC_ALL': 'C'})
git_version = version.parse(re.search(r"\d+\.\d+\.\d+", out.decode("utf-8")).group(0))
if not git_version >= version.parse("2.25.0"):
@@ -36,10 +51,18 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirec
if dirname is not None:
clone_cmd.append(dirname)
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
try:
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
except dbt.exceptions.CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
if subdirectory:
run_cmd(os.path.join(cwd, dirname or ''), ['git', 'sparse-checkout', 'set', subdirectory])
cwd_subdir = os.path.join(cwd, dirname or '')
clone_cmd_subdir = ['git', 'sparse-checkout', 'set', subdirectory]
try:
run_cmd(cwd_subdir, clone_cmd_subdir)
except dbt.exceptions.CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
if remove_git_dir:
rmdir(os.path.join(dirname, '.git'))
@@ -54,7 +77,7 @@ def list_tags(cwd):
def _checkout(cwd, repo, revision):
logger.debug(' Checking out revision {}.'.format(revision))
fire_event(GitProgressCheckoutRevision(revision=revision))
fetch_cmd = ["git", "fetch", "origin", "--depth", "1"]
@@ -111,14 +134,17 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
except dbt.exceptions.CommandResultError as exc:
err = exc.stderr.decode('utf-8')
exists = re.match("fatal: destination path '(.+)' already exists", err)
if not exists: # something else is wrong, raise it
if not exists:
print(
'\nSomething went wrong while cloning {}'.format(repo) +
'\nCheck the debug logs for more information')
raise
directory = None
start_sha = None
if exists:
directory = exists.group(1)
logger.debug('Updating existing dependency {}.', directory)
fire_event(GitProgressUpdatingExistingDependency(dir=directory))
else:
matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
if matches is None:
@@ -126,17 +152,18 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
f'Error cloning {repo} - never saw "Cloning into ..." from git'
)
directory = matches.group(1)
logger.debug('Pulling new dependency {}.', directory)
fire_event(GitProgressPullingNewDependency(dir=directory))
full_path = os.path.join(cwd, directory)
start_sha = get_current_sha(full_path)
checkout(full_path, repo, revision)
end_sha = get_current_sha(full_path)
if exists:
if start_sha == end_sha:
logger.debug(' Already at {}, nothing to do.', start_sha[:7])
fire_event(GitNothingToDo(sha=start_sha[:7]))
else:
logger.debug(' Updated checkout from {} to {}.',
start_sha[:7], end_sha[:7])
fire_event(GitProgressUpdatedCheckoutRange(
start_sha=start_sha[:7], end_sha=end_sha[:7]
))
else:
logger.debug(' Checked out at {}.', end_sha[:7])
fire_event(GitProgressCheckedOutAt(end_sha=end_sha[:7]))
return os.path.join(directory, subdirectory or '')

View File

@@ -21,7 +21,7 @@ import jinja2.sandbox
from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
get_test_macro_name, deep_map
get_test_macro_name, deep_map_render
)
from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
@@ -33,7 +33,6 @@ from dbt.exceptions import (
UndefinedMacroException
)
from dbt import flags
from dbt.logger import GLOBAL_LOGGER as logger # noqa
def _linecache_inject(source, write):
@@ -661,5 +660,7 @@ def add_rendered_test_kwargs(
return value
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
# The test_metadata.kwargs come from the test builder, and were set
# when the test node was created in _parse_generic_test.
kwargs = deep_map_render(_convert_function, node.test_metadata.kwargs)
context[GENERIC_TEST_KWARGS_NAME] = kwargs

View File

@@ -1,7 +1,11 @@
import functools
import requests
from dbt.events.functions import fire_event
from dbt.events.types import (
RegistryProgressMakingGETRequest,
RegistryProgressGETResponse
)
from dbt.utils import memoized, _connection_exception_retry as connection_exception_retry
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import deprecations
import os
@@ -25,11 +29,14 @@ def _get_with_retries(path, registry_base_url=None):
def _get(path, registry_base_url=None):
url = _get_url(path, registry_base_url)
logger.debug('Making package registry request: GET {}'.format(url))
fire_event(RegistryProgressMakingGETRequest(url=url))
resp = requests.get(url, timeout=30)
logger.debug('Response from registry: GET {} {}'.format(url,
resp.status_code))
fire_event(RegistryProgressGETResponse(url=url, resp_code=resp.status_code))
resp.raise_for_status()
if resp is None:
raise requests.exceptions.ContentDecodingError(
'Request error: The response is None', response=resp
)
return resp.json()

View File

@@ -15,8 +15,12 @@ from typing import (
Type, NoReturn, List, Optional, Dict, Any, Tuple, Callable, Union
)
from dbt.events.functions import fire_event
from dbt.events.types import (
SystemErrorRetrievingModTime, SystemCouldNotWrite, SystemExecutingCmd, SystemStdOutMsg,
SystemStdErrMsg, SystemReportReturnCode
)
import dbt.exceptions
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import _connection_exception_retry as connection_exception_retry
if sys.platform == 'win32':
@@ -65,9 +69,7 @@ def find_matching(
try:
modification_time = os.path.getmtime(absolute_path)
except OSError:
logger.exception(
f"Error retrieving modification time for file {absolute_path}"
)
fire_event(SystemErrorRetrievingModTime(path=absolute_path))
if reobj.match(local_file):
matching.append({
'searched_path': relative_path_to_search,
@@ -161,10 +163,7 @@ def write_file(path: str, contents: str = '') -> bool:
reason = 'Path was possibly too long'
# all our hard work and the path was still too long. Log and
# continue.
logger.debug(
f'Could not write to path {path}({len(path)} characters): '
f'{reason}\nexception: {exc}'
)
fire_event(SystemCouldNotWrite(path=path, reason=reason, exc=exc))
else:
raise
return True
@@ -412,7 +411,7 @@ def _interpret_oserror(exc: OSError, cwd: str, cmd: List[str]) -> NoReturn:
def run_cmd(
cwd: str, cmd: List[str], env: Optional[Dict[str, Any]] = None
) -> Tuple[bytes, bytes]:
logger.debug('Executing "{}"'.format(' '.join(cmd)))
fire_event(SystemExecutingCmd(cmd=cmd))
if len(cmd) == 0:
raise dbt.exceptions.CommandError(cwd, cmd)
@@ -438,11 +437,11 @@ def run_cmd(
except OSError as exc:
_interpret_oserror(exc, cwd, cmd)
logger.debug('STDOUT: "{!s}"'.format(out))
logger.debug('STDERR: "{!s}"'.format(err))
fire_event(SystemStdOutMsg(bmsg=out))
fire_event(SystemStdErrMsg(bmsg=err))
if proc.returncode != 0:
logger.debug('command return code={}'.format(proc.returncode))
fire_event(SystemReportReturnCode(code=proc.returncode))
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
out, err)

View File

@@ -9,7 +9,7 @@ from dbt import flags
from dbt.adapters.factory import get_adapter
from dbt.clients import jinja
from dbt.clients.system import make_directory
from dbt.context.providers import generate_runtime_model
from dbt.context.providers import generate_runtime_model_context
from dbt.contracts.graph.manifest import Manifest, UniqueID
from dbt.contracts.graph.compiled import (
COMPILED_TYPES,
@@ -26,9 +26,10 @@ from dbt.exceptions import (
RuntimeException,
)
from dbt.graph import Graph
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import FoundStats, CompilingNode, WritingInjectedSQLForNode
from dbt.node_types import NodeType
from dbt.utils import pluralize
from dbt.events.format import pluralize
import dbt.tracking
graph_file_name = 'graph.gpickle'
@@ -53,6 +54,7 @@ def print_compile_stats(stats):
NodeType.Seed: 'seed file',
NodeType.Source: 'source',
NodeType.Exposure: 'exposure',
NodeType.Metric: 'metric'
}
results = {k: 0 for k in names.keys()}
@@ -68,7 +70,7 @@ def print_compile_stats(stats):
if t in names
])
logger.info("Found {}".format(stat_line))
fire_event(FoundStats(stat_line=stat_line))
def _node_enabled(node: ManifestNode):
@@ -178,7 +180,7 @@ class Compiler:
extra_context: Dict[str, Any],
) -> Dict[str, Any]:
context = generate_runtime_model(
context = generate_runtime_model_context(
node, self.config, manifest
)
context.update(extra_context)
@@ -366,7 +368,7 @@ class Compiler:
if extra_context is None:
extra_context = {}
logger.debug("Compiling {}".format(node.unique_id))
fire_event(CompilingNode(unique_id=node.unique_id))
data = node.to_dict(omit_none=True)
data.update({
@@ -418,42 +420,44 @@ class Compiler:
else:
dependency_not_found(node, dependency)
def link_graph(self, linker: Linker, manifest: Manifest):
def link_graph(self, linker: Linker, manifest: Manifest, add_test_edges: bool = False):
for source in manifest.sources.values():
linker.add_node(source.unique_id)
for node in manifest.nodes.values():
self.link_node(linker, node, manifest)
for exposure in manifest.exposures.values():
self.link_node(linker, exposure, manifest)
for metric in manifest.metrics.values():
self.link_node(linker, metric, manifest)
cycle = linker.find_cycles()
if cycle:
raise RuntimeError("Found a cycle: {}".format(cycle))
manifest.build_parent_and_child_maps()
if add_test_edges:
manifest.build_parent_and_child_maps()
self.add_test_edges(linker, manifest)
self.resolve_graph(linker, manifest)
def resolve_graph(self, linker: Linker, manifest: Manifest) -> None:
def add_test_edges(self, linker: Linker, manifest: Manifest) -> None:
""" This method adds additional edges to the DAG. For a given non-test
executable node, add an edge from an upstream test to the given node if
the set of nodes the test depends on is a proper/strict subset of the
upstream nodes for the given node. """
the set of nodes the test depends on is a subset of the upstream nodes
for the given node. """
# Given a graph:
# model1 --> model2 --> model3
# | |
# | \/
# \/ test 2
# | |
# | \/
# \/ test 2
# test1
#
# Produce the following graph:
# model1 --> model2 --> model3
# | | /\ /\
# | \/ | |
# \/ test2 ------- |
# test1 -------------------
# | /\ | /\ /\
# | | \/ | |
# \/ | test2 ----| |
# test1 ----|---------------|
for node_id in linker.graph:
# If node is executable (in manifest.nodes) and does _not_
@@ -491,21 +495,19 @@ class Compiler:
)
# If the set of nodes that an upstream test depends on
# is a proper (or strict) subset of all upstream nodes of
# the current node, add an edge from the upstream test
# to the current node. Must be a proper/strict subset to
# avoid adding a circular dependency to the graph.
if (test_depends_on < upstream_nodes):
# is a subset of all upstream nodes of the current node,
# add an edge from the upstream test to the current node.
if (test_depends_on.issubset(upstream_nodes)):
linker.graph.add_edge(
upstream_test,
node_id
)
def compile(self, manifest: Manifest, write=True) -> Graph:
def compile(self, manifest: Manifest, write=True, add_test_edges=False) -> Graph:
self.initialize()
linker = Linker()
self.link_graph(linker, manifest)
self.link_graph(linker, manifest, add_test_edges)
stats = _generate_stats(manifest)
@@ -520,7 +522,7 @@ class Compiler:
if (not node.extra_ctes_injected or
node.resource_type == NodeType.Snapshot):
return node
logger.debug(f'Writing injected SQL for node "{node.unique_id}"')
fire_event(WritingInjectedSQLForNode(unique_id=node.unique_id))
if node.compiled_sql:
node.compiled_path = node.write_node(

View File

@@ -15,7 +15,8 @@ from dbt.exceptions import DbtProjectError
from dbt.exceptions import ValidationException
from dbt.exceptions import RuntimeException
from dbt.exceptions import validator_error_message
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.types import MissingProfileTarget
from dbt.events.functions import fire_event
from dbt.utils import coerce_dict_str
from .renderer import ProfileRenderer
@@ -91,6 +92,7 @@ class Profile(HasCredentials):
user_config: UserConfig
threads: int
credentials: Credentials
profile_env_vars: Dict[str, Any]
def __init__(
self,
@@ -108,6 +110,7 @@ class Profile(HasCredentials):
self.user_config = user_config
self.threads = threads
self.credentials = credentials
self.profile_env_vars = {} # never available on init
def to_profile_info(
self, serialize_credentials: bool = False
@@ -291,10 +294,7 @@ class Profile(HasCredentials):
target_name = renderer.render_value(raw_profile['target'])
else:
target_name = 'default'
logger.debug(
"target not specified in profile '{}', using '{}'"
.format(profile_name, target_name)
)
fire_event(MissingProfileTarget(profile_name=profile_name, target_name=target_name))
raw_profile_data = cls._get_profile_data(
raw_profile, profile_name, target_name

View File

@@ -284,6 +284,7 @@ class PartialProject(RenderComponents):
selectors_dict=rendered_selectors,
)
# Called by 'collect_parts' in RuntimeConfig
def render(self, renderer: DbtProjectYamlRenderer) -> 'Project':
try:
rendered = self.get_rendered(renderer)
@@ -397,6 +398,8 @@ class PartialProject(RenderComponents):
vars_dict = cfg.vars
vars_value = VarProvider(vars_dict)
# There will never be any project_env_vars when it's first created
project_env_vars: Dict[str, Any] = {}
on_run_start: List[str] = value_or(cfg.on_run_start, [])
on_run_end: List[str] = value_or(cfg.on_run_end, [])
@@ -444,6 +447,7 @@ class PartialProject(RenderComponents):
vars=vars_value,
config_version=cfg.config_version,
unrendered=unrendered,
project_env_vars=project_env_vars,
)
# sanity check - this means an internal issue
project.validate()
@@ -556,6 +560,7 @@ class Project:
query_comment: QueryComment
config_version: int
unrendered: RenderComponents
project_env_vars: Dict[str, Any]
@property
def all_source_paths(self) -> List[str]:
@@ -564,6 +569,13 @@ class Project:
self.analysis_paths, self.macro_paths
)
@property
def generic_test_paths(self):
generic_test_paths = []
for test_path in self.test_paths:
generic_test_paths.append(os.path.join(test_path, 'generic'))
return generic_test_paths
def __str__(self):
cfg = self.to_project_config(with_packages=True)
return str(cfg)
@@ -638,26 +650,6 @@ class Project:
verify_version=verify_version,
)
@classmethod
def render_from_dict(
cls,
project_root: str,
project_dict: Dict[str, Any],
packages_dict: Dict[str, Any],
selectors_dict: Dict[str, Any],
renderer: DbtProjectYamlRenderer,
*,
verify_version: bool = False
) -> 'Project':
partial = PartialProject.from_dicts(
project_root=project_root,
project_dict=project_dict,
packages_dict=packages_dict,
selectors_dict=selectors_dict,
verify_version=verify_version,
)
return partial.render(renderer)
@classmethod
def from_project_root(
cls,

View File

@@ -1,12 +1,13 @@
from typing import Dict, Any, Tuple, Optional, Union, Callable
from dbt.clients.jinja import get_rendered, catch_jinja
from dbt.context.target import TargetContext
from dbt.context.base import BaseContext
from dbt.contracts.connection import HasCredentials
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
from dbt.node_types import NodeType
from dbt.utils import deep_map
from dbt.utils import deep_map_render
Keypath = Tuple[Union[str, int], ...]
@@ -47,7 +48,7 @@ class BaseRenderer:
self, data: Dict[str, Any]
) -> Dict[str, Any]:
try:
return deep_map(self.render_entry, data)
return deep_map_render(self.render_entry, data)
except RecursionException:
raise DbtProjectError(
f'Cycle detected: {self.name} input has a reference to itself',
@@ -99,6 +100,23 @@ class ProjectPostprocessor(Dict[Keypath, Callable[[Any], Any]]):
class DbtProjectYamlRenderer(BaseRenderer):
_KEYPATH_HANDLERS = ProjectPostprocessor()
def __init__(
self, profile: Optional[HasCredentials] = None,
cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars. This is almost always
# a TargetContext, but in the debug task we want a project
# even when we don't have a profile.
if cli_vars is None:
cli_vars = {}
if profile:
self.ctx_obj = TargetContext(profile, cli_vars)
else:
self.ctx_obj = BaseContext(cli_vars) # type:ignore
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
'Project config'
@@ -158,74 +176,23 @@ class DbtProjectYamlRenderer(BaseRenderer):
class ProfileRenderer(BaseRenderer):
def __init__(
self, cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars.
if cli_vars is None:
cli_vars = {}
self.ctx_obj = BaseContext(cli_vars)
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
'Profile'
class SchemaYamlRenderer(BaseRenderer):
DOCUMENTABLE_NODES = frozenset(
n.pluralize() for n in NodeType.documentable()
)
@property
def name(self):
return 'Rendering yaml'
def _is_norender_key(self, keypath: Keypath) -> bool:
"""
models:
- name: blah
- description: blah
tests: ...
- columns:
- name:
- description: blah
tests: ...
Return True if it's tests or description - those aren't rendered
"""
if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
return True
if (
len(keypath) >= 4 and
keypath[1] == 'columns' and
keypath[3] in ('tests', 'description')
):
return True
return False
# don't render descriptions or test keyword arguments
def should_render_keypath(self, keypath: Keypath) -> bool:
if len(keypath) < 2:
return True
if keypath[0] not in self.DOCUMENTABLE_NODES:
return True
if len(keypath) < 3:
return True
if keypath[0] == NodeType.Source.pluralize():
if keypath[2] == 'description':
return False
if keypath[2] == 'tables':
if self._is_norender_key(keypath[3:]):
return False
elif keypath[0] == NodeType.Macro.pluralize():
if keypath[2] == 'arguments':
if self._is_norender_key(keypath[3:]):
return False
elif self._is_norender_key(keypath[1:]):
return False
else: # keypath[0] in self.DOCUMENTABLE_NODES:
if self._is_norender_key(keypath[1:]):
return False
return True
class PackageRenderer(BaseRenderer):
@property
def name(self):

View File

@@ -16,12 +16,11 @@ from dbt import flags
from dbt import tracking
from dbt.adapters.factory import get_relation_class_by_name, get_include_paths
from dbt.helper_types import FQNPath, PathSet
from dbt.context.base import generate_base_context
from dbt.context.target import generate_target_context
from dbt.contracts.connection import AdapterRequiredConfig, Credentials
from dbt.contracts.graph.manifest import ManifestMetadata
from dbt.contracts.relation import ComponentName
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.types import ProfileLoadError, ProfileNotFound
from dbt.events.functions import fire_event
from dbt.ui import warning_tag
from dbt.contracts.project import Configuration, UserConfig
@@ -60,6 +59,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def __post_init__(self):
self.validate()
# Called by 'new_project' and 'from_args'
@classmethod
def from_parts(
cls,
@@ -116,6 +116,8 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name=profile.profile_name,
target_name=profile.target_name,
user_config=profile.user_config,
@@ -126,6 +128,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
dependencies=dependencies,
)
# Called by 'load_projects' in this class
def new_project(self, project_root: str) -> 'RuntimeConfig':
"""Given a new project root, read in its project dictionary, supply the
existing project's profile info, and create a new project file.
@@ -140,7 +143,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
profile.validate()
# load the new project and its packages. Don't pass cli variables.
renderer = DbtProjectYamlRenderer(generate_target_context(profile, {}))
renderer = DbtProjectYamlRenderer(profile)
project = Project.from_project_root(
project_root,
@@ -148,14 +151,14 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
verify_version=bool(flags.VERSION_CHECK),
)
cfg = self.from_parts(
runtime_config = self.from_parts(
project=project,
profile=profile,
args=deepcopy(self.args),
)
# force our quoting back onto the new project.
cfg.quoting = deepcopy(self.quoting)
return cfg
runtime_config.quoting = deepcopy(self.quoting)
return runtime_config
def serialize(self) -> Dict[str, Any]:
"""Serialize the full configuration to a single dictionary. For any
@@ -205,21 +208,26 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
)
# build the profile using the base renderer and the one fact we know
# Note: only the named profile section is rendered. The rest of the
# profile is ignored.
cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}'))
profile_renderer = ProfileRenderer(generate_base_context(cli_vars))
profile_renderer = ProfileRenderer(cli_vars)
profile_name = partial.render_profile_name(profile_renderer)
profile = cls._get_rendered_profile(
args, profile_renderer, profile_name
)
# Save env_vars encountered in rendering for partial parsing
profile.profile_env_vars = profile_renderer.ctx_obj.env_vars
# get a new renderer using our target information and render the
# project
ctx = generate_target_context(profile, cli_vars)
project_renderer = DbtProjectYamlRenderer(ctx)
project_renderer = DbtProjectYamlRenderer(profile, cli_vars)
project = partial.render(project_renderer)
# Save env_vars encountered in rendering for partial parsing
project.project_env_vars = project_renderer.ctx_obj.env_vars
return (project, profile)
# Called in main.py, lib.py, task/base.py
@classmethod
def from_args(cls, args: Any) -> 'RuntimeConfig':
"""Given arguments, read in dbt_project.yml from the current directory,
@@ -253,7 +261,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
) -> PathSet:
for key, value in config.items():
if isinstance(value, dict) and not key.startswith('+'):
self._get_v2_config_paths(value, path + (key,), paths)
self._get_config_paths(value, path + (key,), paths)
else:
paths.add(path)
return frozenset(paths)
@@ -360,6 +368,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def clear_dependencies(self):
self.dependencies = None
# Called by 'load_dependencies' in this class
def load_projects(
self, paths: Iterable[Path]
) -> Iterator[Tuple[str, 'RuntimeConfig']]:
@@ -512,6 +521,8 @@ class UnsetProfileConfig(RuntimeConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name='',
target_name='',
user_config=UnsetConfig(),
@@ -534,13 +545,8 @@ class UnsetProfileConfig(RuntimeConfig):
args, profile_renderer, profile_name
)
except (DbtProjectError, DbtProfileError) as exc:
logger.debug(
'Profile not loaded due to error: {}', exc, exc_info=True
)
logger.info(
'No profile "{}" found, continuing with no target',
profile_name
)
fire_event(ProfileLoadError(exc=exc))
fire_event(ProfileNotFound(profile_name=profile_name))
# return the poisoned form
profile = UnsetProfile()
# disable anonymous usage statistics

View File

@@ -1,8 +1,9 @@
from typing import Dict, Any
from dbt.clients import yaml_helper
from dbt.events.functions import fire_event
from dbt.exceptions import raise_compiler_error, ValidationException
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.types import InvalidVarsYAML
def parse_cli_vars(var_string: str) -> Dict[str, Any]:
@@ -17,7 +18,5 @@ def parse_cli_vars(var_string: str) -> Dict[str, Any]:
"The --vars argument must be a YAML dictionary, but was "
"of type '{}'".format(type_name))
except ValidationException:
logger.error(
"The YAML provided in the --vars argument is not valid.\n"
)
fire_event(InvalidVarsYAML())
raise

View File

@@ -6,13 +6,14 @@ from typing import (
from dbt import flags
from dbt import tracking
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients.jinja import get_rendered
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.exceptions import raise_compiler_error, MacroReturn, raise_parsing_error
from dbt.events.functions import fire_event
from dbt.events.types import MacroEventInfo, MacroEventDebug
from dbt.version import __version__ as dbt_version
# These modules are added to the context. Consider alternative
@@ -21,6 +22,38 @@ import pytz
import datetime
import re
# Contexts in dbt Core
# Contexts are used for Jinja rendering. They include context methods,
# executable macros, and various settings that are available in Jinja.
#
# Different contexts are used in different places because we allow access
# to different methods and data in different places. Executable SQL, for
# example, includes the available macros and the model, while Jinja in
# yaml files is more limited.
#
# The context that is passed to Jinja is always in a dictionary format,
# not an actual class, so a 'to_dict()' is executed on a context class
# before it is used for rendering.
#
# Each context has a generate_<name>_context function to create the context.
# ProviderContext subclasses have different generate functions for
# parsing and for execution.
#
# Context class hierarchy
#
# BaseContext -- core/dbt/context/base.py
# TargetContext -- core/dbt/context/target.py
# ConfiguredContext -- core/dbt/context/configured.py
# SchemaYamlContext -- core/dbt/context/configured.py
# DocsRuntimeContext -- core/dbt/context/configured.py
# MacroResolvingContext -- core/dbt/context/configured.py
# ManifestContext -- core/dbt/context/manifest.py
# QueryHeaderContext -- core/dbt/context/manifest.py
# ProviderContext -- core/dbt/context/provider.py
# MacroContext -- core/dbt/context/provider.py
# ModelContext -- core/dbt/context/provider.py
# TestContext -- core/dbt/context/provider.py
def get_pytz_module_context() -> Dict[str, Any]:
context_exports = pytz.__all__ # type: ignore
@@ -160,9 +193,11 @@ class Var:
class BaseContext(metaclass=ContextMeta):
# subclass is TargetContext
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
self.env_vars = {}
def generate_builtins(self):
builtins: Dict[str, Any] = {}
@@ -271,20 +306,24 @@ class BaseContext(metaclass=ContextMeta):
return Var(self._ctx, self.cli_vars)
@contextmember
@staticmethod
def env_var(var: str, default: Optional[str] = None) -> str:
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var in os.environ:
return os.environ[var]
return_value = os.environ[var]
elif default is not None:
return default
return_value = default
if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
undefined_error(msg)
raise_parsing_error(msg)
if os.environ.get('DBT_MACRO_DEBUGGING'):
@contextmember
@@ -443,9 +482,9 @@ class BaseContext(metaclass=ContextMeta):
{% endmacro %}"
"""
if info:
logger.info(msg)
fire_event(MacroEventInfo(msg))
else:
logger.debug(msg)
fire_event(MacroEventDebug(msg))
return ''
@contextproperty

View File

@@ -1,14 +1,18 @@
from typing import Any, Dict
import os
from typing import Any, Dict, Optional
from dbt.contracts.connection import AdapterRequiredConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.node_types import NodeType
from dbt.utils import MultiDict
from dbt.context.base import contextproperty, Var
from dbt.context.base import contextproperty, contextmember, Var
from dbt.context.target import TargetContext
from dbt.exceptions import raise_parsing_error
class ConfiguredContext(TargetContext):
# subclasses are SchemaYamlContext, MacroResolvingContext, ManifestContext
config: AdapterRequiredConfig
def __init__(
@@ -63,10 +67,18 @@ class ConfiguredVar(Var):
return self.get_missing_var(var_name)
class SchemaYamlVars():
def __init__(self):
self.env_vars = {}
self.vars = {}
class SchemaYamlContext(ConfiguredContext):
def __init__(self, config, project_name: str):
# subclass is DocsRuntimeContext
def __init__(self, config, project_name: str, schema_yaml_vars: Optional[SchemaYamlVars]):
super().__init__(config)
self._project_name = project_name
self.schema_yaml_vars = schema_yaml_vars
@contextproperty
def var(self) -> ConfiguredVar:
@@ -74,6 +86,22 @@ class SchemaYamlContext(ConfiguredContext):
self._ctx, self.config, self._project_name
)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
if not var.startswith(SECRET_ENV_PREFIX) and self.schema_yaml_vars:
self.schema_yaml_vars.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroResolvingContext(ConfiguredContext):
def __init__(self, config):
@@ -86,10 +114,10 @@ class MacroResolvingContext(ConfiguredContext):
)
def generate_schema_yml(
config: AdapterRequiredConfig, project_name: str
def generate_schema_yml_context(
config: AdapterRequiredConfig, project_name: str, schema_yaml_vars: SchemaYamlVars = None
) -> Dict[str, Any]:
ctx = SchemaYamlContext(config, project_name)
ctx = SchemaYamlContext(config, project_name, schema_yaml_vars)
return ctx.to_dict()

View File

@@ -23,7 +23,7 @@ class DocsRuntimeContext(SchemaYamlContext):
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project)
super().__init__(config, current_project, None)
self.node = node
self.manifest = manifest
@@ -75,7 +75,7 @@ class DocsRuntimeContext(SchemaYamlContext):
return target_doc.block_contents
def generate_runtime_docs(
def generate_runtime_docs_context(
config: RuntimeConfig,
target: Any,
manifest: Manifest,

View File

@@ -17,6 +17,7 @@ class ManifestContext(ConfiguredContext):
The given macros can override any previous context values, which will be
available as if they were accessed relative to the package name.
"""
# subclasses are QueryHeaderContext and ProviderContext
def __init__(
self,
config: AdapterRequiredConfig,

View File

@@ -16,6 +16,7 @@ from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
@@ -31,6 +32,7 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.parsed import (
ParsedMacro,
ParsedExposure,
ParsedMetric,
ParsedSeedNode,
ParsedSourceDefinition,
)
@@ -47,9 +49,9 @@ from dbt.exceptions import (
ref_bad_context,
source_target_not_found,
wrapped_exports,
raise_parsing_error,
)
from dbt.config import IsFQNResource
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.node_types import NodeType
from dbt.utils import (
@@ -636,6 +638,7 @@ T = TypeVar('T')
# Base context collection, used for parsing configs.
class ProviderContext(ManifestContext):
# subclasses are MacroContext, ModelContext, TestContext
def __init__(
self,
model,
@@ -1161,6 +1164,34 @@ class ProviderContext(ManifestContext):
)
raise CompilationException(msg)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file.
# If this is compiling, do not save because it's irrelevant to parsing.
if (not var.startswith(SECRET_ENV_PREFIX) and self.model and
not hasattr(self.model, 'compiled')):
self.manifest.env_vars[var] = return_value
source_file = self.manifest.files[self.model.file_id]
# Schema files should never get here
if source_file.parse_file_type != 'schema':
source_file.env_vars.append(var)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroContext(ProviderContext):
"""Internally, macros can be executed like nodes, with some restrictions:
@@ -1262,7 +1293,7 @@ class ModelContext(ProviderContext):
# This is called by '_context_for', used in 'render_with_context'
def generate_parser_model(
def generate_parser_model_context(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1279,7 +1310,7 @@ def generate_parser_model(
return ctx.to_dict()
def generate_generate_component_name_macro(
def generate_generate_name_macro_context(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1290,7 +1321,7 @@ def generate_generate_component_name_macro(
return ctx.to_dict()
def generate_runtime_model(
def generate_runtime_model_context(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1301,7 +1332,7 @@ def generate_runtime_model(
return ctx.to_dict()
def generate_runtime_macro(
def generate_runtime_macro_context(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1355,6 +1386,31 @@ def generate_parse_exposure(
}
class MetricRefResolver(BaseResolver):
def __call__(self, *args) -> str:
if len(args) not in (1, 2):
ref_invalid_args(self.model, args)
self.model.refs.append(list(args))
return ''
def generate_parse_metrics(
metric: ParsedMetric,
config: RuntimeConfig,
manifest: Manifest,
package_name: str,
) -> Dict[str, Any]:
project = config.load_dependencies()[package_name]
return {
'ref': MetricRefResolver(
None,
metric,
project,
manifest,
),
}
# This class is currently used by the schema parser in order
# to limit the number of macros in the context by using
# the TestMacroNamespace
@@ -1389,8 +1445,13 @@ class TestContext(ProviderContext):
# 'depends_on.macros' by using the TestMacroNamespace
def _build_test_namespace(self):
depends_on_macros = []
# all generic tests use a macro named 'get_where_subquery' to wrap 'model' arg
# see generic_test_builders.build_model_str
get_where_subquery = self.macro_resolver.macros_by_name.get('get_where_subquery')
if get_where_subquery:
depends_on_macros.append(get_where_subquery.unique_id)
if self.model.depends_on and self.model.depends_on.macros:
depends_on_macros = self.model.depends_on.macros
depends_on_macros.extend(self.model.depends_on.macros)
lookup_macros = depends_on_macros.copy()
for macro_unique_id in lookup_macros:
lookup_macro = self.macro_resolver.macros.get(macro_unique_id)
@@ -1403,6 +1464,28 @@ class TestContext(ProviderContext):
)
self.namespace = macro_namespace
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if not var.startswith(SECRET_ENV_PREFIX) and self.model:
self.manifest.env_vars[var] = return_value
# the "model" should only be test nodes, but just in case, check
if self.model.resource_type == NodeType.Test and self.model.file_key_name:
source_file = self.manifest.files[self.model.file_id]
(yaml_key, name) = self.model.file_key_name.split('.')
source_file.add_env_var(var, yaml_key, name)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
def generate_test_context(
model: ManifestNode,

View File

@@ -8,6 +8,7 @@ from dbt.context.base import (
class TargetContext(BaseContext):
# subclass is ConfiguredContext
def __init__(self, config: HasCredentials, cli_vars: Dict[str, Any]):
super().__init__(cli_vars=cli_vars)
self.config = config

View File

@@ -7,7 +7,8 @@ from typing import (
)
from dbt.exceptions import InternalException
from dbt.utils import translate_aliases
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import NewConnectionOpening
from typing_extensions import Protocol
from dbt.dataclass_schema import (
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
@@ -101,10 +102,7 @@ class LazyHandle:
self.opener = opener
def resolve(self, connection: Connection) -> Connection:
logger.debug(
'Opening a new connection, currently in state {}'
.format(connection.state)
)
fire_event(NewConnectionOpening(connection_state=connection.state))
return self.opener(connection)

View File

@@ -190,6 +190,7 @@ class SourceFile(BaseSourceFile):
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
env_vars: List[str] = field(default_factory=list)
@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
@@ -222,6 +223,7 @@ class SchemaSourceFile(BaseSourceFile):
tests: Dict[str, Any] = field(default_factory=dict)
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
# any macro patches in this file by macro unique_id.
@@ -230,6 +232,7 @@ class SchemaSourceFile(BaseSourceFile):
# Patches are only against external sources. Sources can be
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None
@@ -252,7 +255,7 @@ class SchemaSourceFile(BaseSourceFile):
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
# Remove partial parsing specific data
for key in ('pp_files', 'pp_test_index', 'pp_dict'):
for key in ('pp_test_index', 'pp_dict'):
if key in dct:
del dct[key]
return dct
@@ -299,5 +302,21 @@ class SchemaSourceFile(BaseSourceFile):
test_ids.extend(self.tests[key][name])
return test_ids
def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
if name not in self.env_vars[yaml_key]:
self.env_vars[yaml_key][name] = []
if var not in self.env_vars[yaml_key][name]:
self.env_vars[yaml_key][name].append(var)
def delete_from_env_vars(self, yaml_key, name):
# We delete all vars for this yaml_key/name because the
# entry has been scheduled for reparsing.
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
del self.env_vars[yaml_key][name]
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]
AnySourceFile = Union[SchemaSourceFile, SourceFile]

View File

@@ -6,8 +6,10 @@ from dbt.contracts.graph.parsed import (
ParsedHookNode,
ParsedModelNode,
ParsedExposure,
ParsedMetric,
ParsedResource,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -81,11 +83,17 @@ class CompiledModelNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class CompiledRPCNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class CompiledSqlNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
@dataclass
class CompiledSeedNode(CompiledNode):
# keep this in sync with ParsedSeedNode!
@@ -119,6 +127,7 @@ class CompiledGenericTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore
@@ -142,6 +151,7 @@ PARSED_TYPES: Dict[Type[CompiledNode], Type[ParsedResource]] = {
CompiledModelNode: ParsedModelNode,
CompiledHookNode: ParsedHookNode,
CompiledRPCNode: ParsedRPCNode,
CompiledSqlNode: ParsedSqlNode,
CompiledSeedNode: ParsedSeedNode,
CompiledSnapshotNode: ParsedSnapshotNode,
CompiledSingularTestNode: ParsedSingularTestNode,
@@ -154,6 +164,7 @@ COMPILED_TYPES: Dict[Type[ParsedResource], Type[CompiledNode]] = {
ParsedModelNode: CompiledModelNode,
ParsedHookNode: CompiledHookNode,
ParsedRPCNode: CompiledRPCNode,
ParsedSqlNode: CompiledSqlNode,
ParsedSeedNode: CompiledSeedNode,
ParsedSnapshotNode: CompiledSnapshotNode,
ParsedSingularTestNode: CompiledSingularTestNode,
@@ -189,6 +200,7 @@ NonSourceCompiledNode = Union[
CompiledModelNode,
CompiledHookNode,
CompiledRPCNode,
CompiledSqlNode,
CompiledGenericTestNode,
CompiledSeedNode,
CompiledSnapshotNode,
@@ -200,6 +212,7 @@ NonSourceParsedNode = Union[
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -220,8 +233,10 @@ CompileResultNode = Union[
ParsedSourceDefinition,
]
# anything that participates in the graph: sources, exposures, manifest nodes
# anything that participates in the graph: sources, exposures, metrics,
# or manifest nodes
GraphMemberNode = Union[
CompileResultNode,
ParsedExposure,
ParsedMetric,
]

View File

@@ -15,8 +15,8 @@ from dbt.contracts.graph.compiled import (
)
from dbt.contracts.graph.parsed import (
ParsedMacro, ParsedDocumentation,
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
UnpatchedSourceDefinition, ManifestNodes
ParsedSourceDefinition, ParsedExposure, ParsedMetric,
HasUniqueID, UnpatchedSourceDefinition, ManifestNodes
)
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
@@ -29,7 +29,8 @@ from dbt.exceptions import (
raise_duplicate_resource_name, raise_compiler_error,
)
from dbt.helper_types import PathSet
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import MergedFromState
from dbt.node_types import NodeType
from dbt.ui import line_wrap_message
from dbt import flags
@@ -546,6 +547,8 @@ class ParsingInfo:
@dataclass
class ManifestStateCheck(dbtClassMixin):
vars_hash: FileHash = field(default_factory=FileHash.empty)
project_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_hash: FileHash = field(default_factory=FileHash.empty)
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)
@@ -562,6 +565,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros: MutableMapping[str, ParsedMacro] = field(default_factory=dict)
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
metrics: MutableMapping[str, ParsedMetric] = field(default_factory=dict)
selectors: MutableMapping[str, Any] = field(default_factory=dict)
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
@@ -569,6 +573,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
state_check: ManifestStateCheck = field(default_factory=ManifestStateCheck)
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
env_vars: MutableMapping[str, str] = field(default_factory=dict)
_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
@@ -628,6 +633,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def update_exposure(self, new_exposure: ParsedExposure):
_update_into(self.exposures, new_exposure)
def update_metric(self, new_metric: ParsedMetric):
_update_into(self.metrics, new_metric)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
@@ -645,6 +653,10 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
k: v.to_dict(omit_none=False)
for k, v in self.exposures.items()
},
'metrics': {
k: v.to_dict(omit_none=False)
for k, v in self.metrics.items()
},
'nodes': {
k: v.to_dict(omit_none=False)
for k, v in self.nodes.items()
@@ -697,7 +709,12 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def get_resource_fqns(self) -> Mapping[str, PathSet]:
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
all_resources = chain(
self.exposures.values(),
self.nodes.values(),
self.sources.values(),
self.metrics.values()
)
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
if resource_type_plural not in resource_fqns:
@@ -725,6 +742,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros={k: _deepcopy(v) for k, v in self.macros.items()},
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
disabled={k: _deepcopy(v) for k, v in self.disabled.items()},
@@ -737,6 +755,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.nodes.values(),
self.sources.values(),
self.exposures.values(),
self.metrics.values(),
))
forward_edges, backward_edges = build_node_edges(edge_members)
self.child_map = forward_edges
@@ -758,6 +777,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros=self.macros,
docs=self.docs,
exposures=self.exposures,
metrics=self.metrics,
selectors=self.selectors,
metadata=self.metadata,
disabled=self.disabled,
@@ -777,6 +797,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return self.sources[unique_id]
elif unique_id in self.exposures:
return self.exposures[unique_id]
elif unique_id in self.metrics:
return self.metrics[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.InternalException(
@@ -937,9 +959,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
# log up to 5 items
sample = list(islice(merged, 5))
logger.debug(
f'Merged {len(merged)} items from state (sample: {sample})'
)
fire_event(MergedFromState(nbr_merged=len(merged), sample=sample))
# Methods that were formerly in ParseResult
@@ -1005,6 +1025,11 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)
def add_metric(self, source_file: SchemaSourceFile, metric: ParsedMetric):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
source_file.metrics.append(metric.unique_id)
def add_disabled_nofile(self, node: CompileResultNode):
# There can be multiple disabled nodes for the same unique_id
if node.unique_id in self.disabled:
@@ -1041,6 +1066,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.macros,
self.docs,
self.exposures,
self.metrics,
self.selectors,
self.files,
self.metadata,
@@ -1048,6 +1074,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.state_check,
self.source_patches,
self.disabled,
self.env_vars,
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
@@ -1070,7 +1097,7 @@ AnyManifest = Union[Manifest, MacroManifest]
@dataclass
@schema_version('manifest', 3)
@schema_version('manifest', 4)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=(
@@ -1097,6 +1124,11 @@ class WritableManifest(ArtifactMixin):
'The exposures defined in the dbt project and its dependencies'
))
)
metrics: Mapping[UniqueID, ParsedMetric] = field(
metadata=dict(description=(
'The metrics defined in the dbt project and its dependencies'
))
)
selectors: Mapping[UniqueID, Any] = field(
metadata=dict(description=(
'The selectors defined in selectors.yml'

View File

@@ -26,11 +26,10 @@ from dbt.contracts.graph.unparsed import (
UnparsedBaseNode, FreshnessThreshold, ExternalTable,
HasYamlMetadata, MacroArgument, UnparsedSourceDefinition,
UnparsedSourceTableDefinition, UnparsedColumn, TestDef,
ExposureOwner, ExposureType, MaturityType
ExposureOwner, ExposureType, MaturityType, MetricFilter
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.exceptions import warn_or_error
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt import flags
from dbt.node_types import NodeType
@@ -151,7 +150,7 @@ class ParsedNodeMixins(dbtClassMixin):
# Note: config should already be updated
self.patch_path: Optional[str] = patch.file_id
# update created_at so process_docs will run in partial parsing
self.created_at = int(time.time())
self.created_at = time.time()
self.description = patch.description
self.columns = patch.columns
self.meta = patch.meta
@@ -193,7 +192,7 @@ class ParsedNodeDefaults(ParsedNodeMandatory):
build_path: Optional[str] = None
deferred: bool = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: int = field(default_factory=lambda: int(time.time()))
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
def write_node(self, target_path: str, subdirectory: str, payload: str):
@@ -240,6 +239,8 @@ class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
return ParsedSeedNode.from_dict(dct)
elif resource_type == 'rpc':
return ParsedRPCNode.from_dict(dct)
elif resource_type == 'sql':
return ParsedSqlNode.from_dict(dct)
elif resource_type == 'test':
if 'test_metadata' in dct:
return ParsedGenericTestNode.from_dict(dct)
@@ -341,11 +342,17 @@ class ParsedModelNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class ParsedRPCNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class ParsedSqlNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
def same_seeds(first: ParsedNode, second: ParsedNode) -> bool:
# for seeds, we check the hashes. If the hashes are different types,
# no match. If the hashes are both the same 'path', log a warning and
@@ -402,6 +409,9 @@ class ParsedSeedNode(ParsedNode):
@dataclass
class TestMetadata(dbtClassMixin, Replaceable):
name: str
# kwargs are the args that are left in the test builder after
# removing configs. They are set from the test builder when
# the test node is created.
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None
@@ -424,6 +434,7 @@ class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
# keep this in sync with CompiledGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
@@ -493,12 +504,12 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
arguments: List[MacroArgument] = field(default_factory=list)
created_at: int = field(default_factory=lambda: int(time.time()))
created_at: float = field(default_factory=lambda: time.time())
def patch(self, patch: ParsedMacroPatch):
self.patch_path: Optional[str] = patch.file_id
self.description = patch.description
self.created_at = int(time.time())
self.created_at = time.time()
self.meta = patch.meta
self.docs = patch.docs
self.arguments = patch.arguments
@@ -614,7 +625,7 @@ class ParsedSourceDefinition(
patch_path: Optional[Path] = None
unrendered_config: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
created_at: int = field(default_factory=lambda: int(time.time()))
created_at: float = field(default_factory=lambda: time.time())
def same_database_representation(
self, other: 'ParsedSourceDefinition'
@@ -725,7 +736,7 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
created_at: int = field(default_factory=lambda: int(time.time()))
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
@@ -771,12 +782,88 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
)
@dataclass
class ParsedMetric(UnparsedBaseNode, HasUniqueID, HasFqn):
model: str
name: str
description: str
label: str
type: str
sql: Optional[str]
timestamp: Optional[str]
filters: List[MetricFilter]
time_grains: List[str]
dimensions: List[str]
resource_type: NodeType = NodeType.Metric
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
created_at: int = field(default_factory=lambda: int(time.time()))
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@property
def search_name(self):
return self.name
def same_model(self, old: 'ParsedMetric') -> bool:
return self.model == old.model
def same_dimensions(self, old: 'ParsedMetric') -> bool:
return self.dimensions == old.dimensions
def same_filters(self, old: 'ParsedMetric') -> bool:
return self.filters == old.filters
def same_description(self, old: 'ParsedMetric') -> bool:
return self.description == old.description
def same_label(self, old: 'ParsedMetric') -> bool:
return self.label == old.label
def same_type(self, old: 'ParsedMetric') -> bool:
return self.type == old.type
def same_sql(self, old: 'ParsedMetric') -> bool:
return self.sql == old.sql
def same_timestamp(self, old: 'ParsedMetric') -> bool:
return self.timestamp == old.timestamp
def same_time_grains(self, old: 'ParsedMetric') -> bool:
return self.time_grains == old.time_grains
def same_contents(self, old: Optional['ParsedMetric']) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True
return (
self.same_model(old) and
self.same_dimensions(old) and
self.same_filters(old) and
self.same_description(old) and
self.same_label(old) and
self.same_type(old) and
self.same_sql(old) and
self.same_timestamp(old) and
self.same_time_grains(old) and
True
)
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedSingularTestNode,
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -788,5 +875,6 @@ ParsedResource = Union[
ParsedMacro,
ParsedNode,
ParsedExposure,
ParsedMetric,
ParsedSourceDefinition,
]

View File

@@ -60,6 +60,7 @@ class UnparsedNode(UnparsedBaseNode, HasSQL):
NodeType.Operation,
NodeType.Seed,
NodeType.RPCCall,
NodeType.SqlOperation,
]})
@property
@@ -167,20 +168,25 @@ class TimePeriod(StrEnum):
@dataclass
class Time(dbtClassMixin, Replaceable):
count: int
period: TimePeriod
class Time(dbtClassMixin, Mergeable):
count: Optional[int] = None
period: Optional[TimePeriod] = None
def exceeded(self, actual_age: float) -> bool:
kwargs = {self.period.plural(): self.count}
if self.period is None or self.count is None:
return False
kwargs: Dict[str, int] = {self.period.plural(): self.count}
difference = timedelta(**kwargs).total_seconds()
return actual_age > difference
def __bool__(self):
return self.count is not None and self.period is not None
@dataclass
class FreshnessThreshold(dbtClassMixin, Mergeable):
warn_after: Optional[Time] = None
error_after: Optional[Time] = None
warn_after: Optional[Time] = field(default_factory=Time)
error_after: Optional[Time] = field(default_factory=Time)
filter: Optional[str] = None
def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus":
@@ -193,7 +199,7 @@ class FreshnessThreshold(dbtClassMixin, Mergeable):
return FreshnessStatus.Pass
def __bool__(self):
return self.warn_after is not None or self.error_after is not None
return bool(self.warn_after) or bool(self.error_after)
@dataclass
@@ -440,3 +446,27 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
tags: List[str] = field(default_factory=list)
url: Optional[str] = None
depends_on: List[str] = field(default_factory=list)
@dataclass
class MetricFilter(dbtClassMixin, Replaceable):
field: str
operator: str
# TODO : Can we make this Any?
value: str
@dataclass
class UnparsedMetric(dbtClassMixin, Replaceable):
model: str
name: str
label: str
type: str
description: str = ''
sql: Optional[str] = None
timestamp: Optional[str] = None
time_grains: List[str] = field(default_factory=list)
dimensions: List[str] = field(default_factory=list)
filters: List[MetricFilter] = field(default_factory=list)
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)

View File

@@ -1,7 +1,6 @@
from dbt.contracts.util import Replaceable, Mergeable, list_str
from dbt.contracts.connection import QueryComment, UserConfigContract
from dbt.helper_types import NoValue
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError,
HyphenatedDbtClassMixin,

View File

@@ -11,10 +11,11 @@ from dbt.contracts.util import (
schema_version,
)
from dbt.exceptions import InternalException
from dbt.events.functions import fire_event
from dbt.events.types import TimingInfoCollected
from dbt.logger import (
TimingProcessor,
JsonOnly,
GLOBAL_LOGGER as logger,
)
from dbt.utils import lowercase
from dbt.dataclass_schema import dbtClassMixin, StrEnum
@@ -54,7 +55,7 @@ class collect_timing_info:
def __exit__(self, exc_type, exc_value, traceback):
self.timing_info.end()
with JsonOnly(), TimingProcessor(self.timing_info):
logger.debug('finished collecting timing info')
fire_event(TimingInfoCollected())
class NodeStatus(StrEnum):
@@ -185,7 +186,7 @@ class RunExecutionResult(
@dataclass
@schema_version('run-results', 3)
@schema_version('run-results', 4)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
@@ -369,7 +370,7 @@ class FreshnessResult(ExecutionResult):
@dataclass
@schema_version('sources', 2)
@schema_version('sources', 3)
class FreshnessExecutionResultArtifact(
ArtifactMixin,
VersionedSchema,

View File

@@ -1,819 +0,0 @@
import enum
import os
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional, Union, List, Any, Dict, Type, Sequence
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.graph.manifest import WritableManifest
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
CatalogArtifact,
CatalogResults,
ExecutionResult,
FreshnessExecutionResultArtifact,
FreshnessResult,
RunOperationResult,
RunOperationResultsArtifact,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.exceptions import InternalException
from dbt.logger import LogMessage
from dbt.utils import restrict_to
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Inputs
@dataclass
class RPCParameters(dbtClassMixin):
task_tags: TaskTags
timeout: Optional[float]
@classmethod
def __pre_deserialize__(cls, data, omit_none=True):
data = super().__pre_deserialize__(data)
if 'timeout' not in data:
data['timeout'] = None
if 'task_tags' not in data:
data['task_tags'] = None
return data
@dataclass
class RPCExecParameters(RPCParameters):
name: str
sql: str
macros: Optional[str] = None
@dataclass
class RPCCompileParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCListParameters(RPCParameters):
resource_types: Optional[List[str]] = None
models: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
selector: Optional[str] = None
output: Optional[str] = 'json'
output_keys: Optional[List[str]] = None
@dataclass
class RPCRunParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSnapshotParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCTestParameters(RPCCompileParameters):
data: bool = False
schema: bool = False
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSeedParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
show: bool = False
state: Optional[str] = None
@dataclass
class RPCDocsGenerateParameters(RPCParameters):
compile: bool = True
state: Optional[str] = None
@dataclass
class RPCBuildParameters(RPCParameters):
resource_types: Optional[List[str]] = None
select: Union[None, str, List[str]] = None
threads: Optional[int] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCCliParameters(RPCParameters):
cli: str
@dataclass
class RPCDepsParameters(RPCParameters):
pass
@dataclass
class KillParameters(RPCParameters):
task_id: TaskID
@dataclass
class PollParameters(RPCParameters):
request_token: TaskID
logs: bool = True
logs_start: int = 0
@dataclass
class PSParameters(RPCParameters):
active: bool = True
completed: bool = False
@dataclass
class StatusParameters(RPCParameters):
pass
@dataclass
class GCSettings(dbtClassMixin):
# start evicting the longest-ago-ended tasks here
maxsize: int
# start evicting all tasks before now - auto_reap_age when we have this
# many tasks in the table
reapsize: int
# a positive timedelta indicating how far back we should go
auto_reap_age: timedelta
@dataclass
class GCParameters(RPCParameters):
"""The gc endpoint takes three arguments, any of which may be present:
- task_ids: An optional list of task ID UUIDs to try to GC
- before: If provided, should be a datetime string. All tasks that finished
before that datetime will be GCed
- settings: If provided, should be a GCSettings object in JSON form. It
will be applied to the task manager before GC starts. By default the
existing gc settings remain.
"""
task_ids: Optional[List[TaskID]] = None
before: Optional[datetime] = None
settings: Optional[GCSettings] = None
@dataclass
class RPCRunOperationParameters(RPCParameters):
macro: str
args: Dict[str, Any] = field(default_factory=dict)
@dataclass
class RPCSourceFreshnessParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
@dataclass
class GetManifestParameters(RPCParameters):
pass
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
@schema_version('remote-list-results', 1)
class RemoteListResults(RemoteResult):
output: List[Any]
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-deps-result', 1)
class RemoteDepsResult(RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-catalog-result', 1)
class RemoteCatalogResults(CatalogResults, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
artifact = CatalogArtifact.from_results(
generated_at=self.generated_at,
nodes=self.nodes,
sources=self.sources,
compile_results=self._compile_results,
errors=self.errors,
)
artifact.write(path)
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-operation-result', 1)
class RemoteRunOperationResult(RunOperationResult, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_local_result(
cls,
base: RunOperationResultsArtifact,
logs: List[LogMessage],
) -> 'RemoteRunOperationResult':
return cls(
generated_at=base.metadata.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
success=base.success,
logs=logs,
)
def write(self, path: str):
writable = RunOperationResultsArtifact.from_success(
success=self.success,
generated_at=self.generated_at,
elapsed_time=self.elapsed_time,
)
writable.write(path)
@dataclass
@schema_version('remote-freshness-result', 1)
class RemoteFreshnessResult(FreshnessResult, RemoteResult):
@classmethod
def from_local_result(
cls,
base: FreshnessResult,
logs: List[LogMessage],
) -> 'RemoteFreshnessResult':
return cls(
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
)
def write(self, path: str):
writable = FreshnessExecutionResultArtifact.from_result(base=self)
writable.write(path)
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)
RPCResult = Union[
RemoteCompileResult,
RemoteExecutionResult,
RemoteFreshnessResult,
RemoteCatalogResults,
RemoteDepsResult,
RemoteRunOperationResult,
]
# GC types
class GCResultState(StrEnum):
Deleted = 'deleted' # successful GC
Missing = 'missing' # nothing to GC
Running = 'running' # can't GC
@dataclass
@schema_version('remote-gc-result', 1)
class GCResult(RemoteResult):
logs: List[LogMessage] = field(default_factory=list)
deleted: List[TaskID] = field(default_factory=list)
missing: List[TaskID] = field(default_factory=list)
running: List[TaskID] = field(default_factory=list)
def add_result(self, task_id: TaskID, state: GCResultState):
if state == GCResultState.Missing:
self.missing.append(task_id)
elif state == GCResultState.Running:
self.running.append(task_id)
elif state == GCResultState.Deleted:
self.deleted.append(task_id)
else:
raise InternalException(
f'Got invalid state in add_result: {state}'
)
# Task management types
class TaskHandlerState(StrEnum):
NotStarted = 'not started'
Initializing = 'initializing'
Running = 'running'
Success = 'success'
Error = 'error'
Killed = 'killed'
Failed = 'failed'
def __lt__(self, other) -> bool:
"""A logical ordering for TaskHandlerState:
NotStarted < Initializing < Running < (Success, Error, Killed, Failed)
"""
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other not in smaller
return False
def __le__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self < other) or
(self == other) or
(self.finished and other.finished))
def __gt__(self, other) -> bool:
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other in smaller
return other in smaller
def __ge__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self > other) or
(self == other) or
(self.finished and other.finished))
@property
def finished(self) -> bool:
return self in (self.Error, self.Success, self.Killed, self.Failed)
@dataclass
class TaskTiming(dbtClassMixin):
state: TaskHandlerState
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
class TaskRow(TaskTiming):
task_id: TaskID
request_source: str
method: str
request_id: Union[str, int]
tags: TaskTags = None
timeout: Optional[float] = None
@dataclass
@schema_version('remote-ps-result', 1)
class PSResult(RemoteResult):
rows: List[TaskRow]
class KillResultStatus(StrEnum):
Missing = 'missing'
NotStarted = 'not_started'
Killed = 'killed'
Finished = 'finished'
@dataclass
@schema_version('remote-kill-result', 1)
class KillResult(RemoteResult):
state: KillResultStatus = KillResultStatus.Missing
logs: List[LogMessage] = field(default_factory=list)
@dataclass
@schema_version('remote-manifest-result', 1)
class GetManifestResult(RemoteResult):
manifest: Optional[WritableManifest] = None
# this is kind of carefuly structured: BlocksManifestTasks is implied by
# RequiresConfigReloadBefore and RequiresManifestReloadAfter
class RemoteMethodFlags(enum.Flag):
Empty = 0
BlocksManifestTasks = 1
RequiresConfigReloadBefore = 3
RequiresManifestReloadAfter = 5
Builtin = 8
# Polling types
@dataclass
class PollResult(RemoteResult, TaskTiming):
state: TaskHandlerState
tags: TaskTags
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
@schema_version('poll-remote-deps-result', 1)
class PollRemoteEmptyCompleteResult(PollResult, RemoteResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_result(
cls: Type['PollRemoteEmptyCompleteResult'],
base: RemoteDepsResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRemoteEmptyCompleteResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-killed-result', 1)
class PollKilledResult(PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Killed),
)
@dataclass
@schema_version('poll-remote-execution-result', 1)
class PollExecuteCompleteResult(
RemoteExecutionResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollExecuteCompleteResult'],
base: RemoteExecutionResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollExecuteCompleteResult':
return cls(
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at,
)
@dataclass
@schema_version('poll-remote-compile-result', 1)
class PollCompileCompleteResult(
RemoteCompileResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCompileCompleteResult'],
base: RemoteCompileResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCompileCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-result', 1)
class PollRunCompleteResult(
RemoteRunResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunCompleteResult'],
base: RemoteRunResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
table=base.table,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-operation-result', 1)
class PollRunOperationCompleteResult(
RemoteRunOperationResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunOperationCompleteResult'],
base: RemoteRunOperationResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunOperationCompleteResult':
return cls(
success=base.success,
results=base.results,
generated_at=base.generated_at,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-catalog-result', 1)
class PollCatalogCompleteResult(RemoteCatalogResults, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCatalogCompleteResult'],
base: RemoteCatalogResults,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCatalogCompleteResult':
return cls(
nodes=base.nodes,
sources=base.sources,
generated_at=base.generated_at,
errors=base.errors,
_compile_results=base._compile_results,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-in-progress-result', 1)
class PollInProgressResult(PollResult):
pass
@dataclass
@schema_version('poll-remote-get-manifest-result', 1)
class PollGetManifestResult(GetManifestResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollGetManifestResult'],
base: GetManifestResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollGetManifestResult':
return cls(
manifest=base.manifest,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-freshness-result', 1)
class PollFreshnessResult(RemoteFreshnessResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollFreshnessResult'],
base: RemoteFreshnessResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollFreshnessResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
)
# Manifest parsing types
class ManifestStatus(StrEnum):
Init = 'init'
Compiling = 'compiling'
Ready = 'ready'
Error = 'error'
@dataclass
@schema_version('remote-status-result', 1)
class LastParse(RemoteResult):
state: ManifestStatus = ManifestStatus.Init
logs: List[LogMessage] = field(default_factory=list)
error: Optional[Dict[str, Any]] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
pid: int = field(default_factory=os.getpid)

88
core/dbt/contracts/sql.py Normal file
View File

@@ -0,0 +1,88 @@
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List, Any, Dict, Sequence
from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
ExecutionResult,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.logger import LogMessage
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)

View File

@@ -6,7 +6,8 @@ from typing import List, Optional, Generic, TypeVar
from dbt.clients import system
from dbt.contracts.project import ProjectPackageMetadata
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import DepsSetDownloadDirectory
DOWNLOADS_PATH = None
@@ -31,7 +32,7 @@ def downloads_directory():
remove_downloads = True
system.make_directory(DOWNLOADS_PATH)
logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH))
fire_event(DepsSetDownloadDirectory(path=DOWNLOADS_PATH))
yield DOWNLOADS_PATH

View File

@@ -12,7 +12,8 @@ from dbt.deps.base import PinnedPackage, UnpinnedPackage, get_downloads_path
from dbt.exceptions import (
ExecutableError, warn_or_error, raise_dependency_error
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import EnsureGitInstalled
from dbt import ui
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
@@ -81,11 +82,7 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
)
except ExecutableError as exc:
if exc.cmd and exc.cmd[0] == 'git':
logger.error(
'Make sure git is installed on your machine. More '
'information: '
'https://docs.getdbt.com/docs/package-management'
)
fire_event(EnsureGitInstalled())
raise
return os.path.join(get_downloads_path(), dir_)

View File

@@ -6,7 +6,8 @@ from dbt.contracts.project import (
ProjectPackageMetadata,
LocalPackage,
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import DepsCreatingLocalSymlink, DepsSymlinkNotAvailable
class LocalPackageMixin:
@@ -57,12 +58,11 @@ class LocalPinnedPackage(LocalPackageMixin, PinnedPackage):
system.remove_file(dest_path)
if can_create_symlink:
logger.debug(' Creating symlink to local dependency.')
fire_event(DepsCreatingLocalSymlink())
system.make_symlink(src_path, dest_path)
else:
logger.debug(' Symlinks are not available on this '
'OS, copying dependency.')
fire_event(DepsSymlinkNotAvailable())
shutil.copytree(src_path, dest_path)

View File

@@ -3,7 +3,6 @@ from typing import Dict, List, NoReturn, Union, Type, Iterator, Set
from dbt.exceptions import raise_dependency_error, InternalException
from dbt.context.target import generate_target_context
from dbt.config import Project, RuntimeConfig
from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.deps.base import BasePackage, PinnedPackage, UnpinnedPackage
@@ -126,8 +125,7 @@ def resolve_packages(
pending = PackageListing.from_contracts(packages)
final = PackageListing()
ctx = generate_target_context(config, config.cli_vars)
renderer = DbtProjectYamlRenderer(ctx)
renderer = DbtProjectYamlRenderer(config, config.cli_vars)
while pending:
next_pending = PackageListing()

22
core/dbt/events/README.md Normal file
View File

@@ -0,0 +1,22 @@
# Events Module
The Events module is the implmentation for structured logging. These events represent both a programatic interface to dbt processes as well as human-readable messaging in one centralized place. The centralization allows for leveraging mypy to enforce interface invariants across all dbt events, and the distinct type layer allows for decoupling events and libraries such as loggers.
# Using the Events Module
The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `events.functions::fire_event` is the entry point to the module from everywhere in dbt.
# Adding a New Event
In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to.
# Adapter Maintainers
To integrate existing log messages from adapters, you likely have a line of code like this in your adapter already:
```python
from dbt.logger import GLOBAL_LOGGER as logger
```
Simply change it to these two lines with your adapter's database name, and all your existing call sites will now use the new system for v1.0:
```python
from dbt.events import AdapterLogger
logger = AdapterLogger("<database name>")
# e.g. AdapterLogger("Snowflake")
```

View File

@@ -0,0 +1 @@
from .adapter_endpoint import AdapterLogger # noqa: F401

View File

@@ -0,0 +1,86 @@
from dataclasses import dataclass
from dbt.events.functions import fire_event
from dbt.events.types import (
AdapterEventDebug, AdapterEventInfo, AdapterEventWarning, AdapterEventError
)
from typing import Any
@dataclass
class AdapterLogger():
name: str
def debug(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventDebug(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def info(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventInfo(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def warning(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventWarning(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def error(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def exception(
self,
msg: str,
exc_info: Any = True, # this default is what makes this method different
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.stack_info = stack_info
event.extra = extra
fire_event(event)

49
core/dbt/events/format.py Normal file
View File

@@ -0,0 +1,49 @@
from dbt import ui
from typing import Optional, Union
from dbt.node_types import NodeType
def format_fancy_output_line(
msg: str, status: str, index: Optional[int],
total: Optional[int], execution_time: Optional[float] = None,
truncate: bool = False
) -> str:
if index is None or total is None:
progress = ''
else:
progress = '{} of {} '.format(index, total)
prefix = "{progress}{message}".format(
progress=progress,
message=msg)
truncate_width = ui.printer_width() - 3
justified = prefix.ljust(ui.printer_width(), ".")
if truncate and len(justified) > truncate_width:
justified = justified[:truncate_width] + '...'
if execution_time is None:
status_time = ""
else:
status_time = " in {execution_time:0.2f}s".format(
execution_time=execution_time)
output = "{justified} [{status}{status_time}]".format(
justified=justified, status=status, status_time=status_time)
return output
def _pluralize(string: Union[str, NodeType]) -> str:
try:
convert = NodeType(string)
except ValueError:
return f'{string}s'
else:
return convert.pluralize()
def pluralize(count, string: Union[str, NodeType]):
pluralized: str = str(string)
if count != 1:
pluralized = _pluralize(string)
return f'{count} {pluralized}'

View File

@@ -0,0 +1,212 @@
from colorama import Style
import dbt.events.functions as this # don't worry I hate it too.
from dbt.events.types import Cli, Event, File, ShowException
import dbt.flags as flags
# TODO this will need to move eventually
from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing
import json
import logging
from logging import Logger
from logging.handlers import RotatingFileHandler
import os
from typing import List
# create the global file logger with no configuration
global FILE_LOG
FILE_LOG = logging.getLogger('default_file')
# set up logger to go to stdout with defaults
# setup_event_logger will be called once args have been parsed
global STDOUT_LOG
STDOUT_LOG = logging.getLogger('default_stdout')
STDOUT_LOG.setLevel(logging.INFO)
stdout_handler = logging.StreamHandler()
stdout_handler.setLevel(logging.INFO)
STDOUT_LOG.addHandler(stdout_handler)
format_color = True
format_json = False
def setup_event_logger(log_path):
make_log_dir_if_missing(log_path)
this.format_json = flags.LOG_FORMAT == 'json'
# USE_COLORS can be None if the app just started and the cli flags
# havent been applied yet
this.format_color = True if flags.USE_COLORS else False
# TODO this default should live somewhere better
log_dest = os.path.join(log_path, 'dbt.log')
level = logging.DEBUG if flags.DEBUG else logging.INFO
# overwrite the STDOUT_LOG logger with the configured one
this.STDOUT_LOG = logging.getLogger('configured_std_out')
this.STDOUT_LOG.setLevel(level)
FORMAT = "%(message)s"
stdout_passthrough_formatter = logging.Formatter(fmt=FORMAT)
stdout_handler = logging.StreamHandler()
stdout_handler.setFormatter(stdout_passthrough_formatter)
stdout_handler.setLevel(level)
this.STDOUT_LOG.addHandler(stdout_handler)
# overwrite the FILE_LOG logger with the configured one
this.FILE_LOG = logging.getLogger('configured_file')
this.FILE_LOG.setLevel(logging.DEBUG) # always debug regardless of user input
file_passthrough_formatter = logging.Formatter(fmt=FORMAT)
# TODO log rotation is not handled by WatchedFileHandler
file_handler = RotatingFileHandler(filename=log_dest, encoding='utf8')
file_handler.setFormatter(file_passthrough_formatter)
file_handler.setLevel(logging.DEBUG) # always debug regardless of user input
this.FILE_LOG.addHandler(file_handler)
def env_secrets() -> List[str]:
return [
v for k, v in os.environ.items()
if k.startswith(SECRET_ENV_PREFIX)
]
def scrub_secrets(msg: str, secrets: List[str]) -> str:
scrubbed = msg
for secret in secrets:
scrubbed = scrubbed.replace(secret, "*****")
return scrubbed
# translates an Event to a completely formatted output log_line
# json=True -> json formatting
# json=False -> text formatting
# cli=True -> cli formatting
# cli=False -> file formatting
def create_log_line(e: Event, json_fmt: bool, cli_dest: bool) -> str:
level = e.level_tag()
values: dict = {
'pid': e.pid,
'msg': '',
'level': level if len(level) == 5 else f"{level} "
}
if cli_dest and isinstance(e, Cli):
values['msg'] = scrub_secrets(e.cli_msg(), env_secrets())
elif not cli_dest and isinstance(e, File):
values['msg'] = scrub_secrets(e.file_msg(), env_secrets())
if json_fmt:
values['ts'] = e.ts.isoformat()
log_line = json.dumps(values, sort_keys=True)
else:
values['ts'] = e.ts.strftime("%H:%M:%S")
color_tag = '' if this.format_color else Style.RESET_ALL
log_line = f"{color_tag}{values['ts']} | [ {values['level']} ] | {values['msg']}"
return log_line
# allows for resuse of this obnoxious if else tree.
# do not use for exceptions, it doesn't pass along exc_info, stack_info, or extra
def send_to_logger(l: Logger, level_tag: str, log_line: str):
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(log_line)
elif level_tag == 'debug':
l.debug(log_line)
elif level_tag == 'info':
l.info(log_line)
elif level_tag == 'warn':
l.warning(log_line)
elif level_tag == 'error':
l.error(log_line)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
)
def send_exc_to_logger(
l: Logger,
level_tag: str,
log_line: str,
exc_info=True,
stack_info=False,
extra=False
):
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'debug':
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'info':
l.info(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'warn':
l.warning(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'error':
l.error(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
)
# top-level method for accessing the new eventing system
# this is where all the side effects happen branched by event type
# (i.e. - mutating the event history, printing to stdout, logging
# to files, etc.)
def fire_event(e: Event) -> None:
# TODO manage history in phase 2: EVENT_HISTORY.append(e)
# explicitly checking the debug flag here so that potentially expensive-to-construct
# log messages are not constructed if debug messages are never shown.
# always logs debug level regardless of user input
if isinstance(e, File):
log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=False)
# doesn't send exceptions to exception logger
send_to_logger(FILE_LOG, level_tag=e.level_tag(), log_line=log_line)
if isinstance(e, Cli):
if e.level_tag() == 'debug' and not flags.DEBUG:
return # eat the message in case it was one of the expensive ones
log_line = create_log_line(e, json_fmt=this.format_json, cli_dest=True)
if not isinstance(e, ShowException):
send_to_logger(STDOUT_LOG, level_tag=e.level_tag(), log_line=log_line)
# CliEventABC and ShowException
else:
send_exc_to_logger(
STDOUT_LOG,
level_tag=e.level_tag(),
log_line=log_line,
exc_info=e.exc_info,
stack_info=e.stack_info,
extra=e.extra
)

View File

@@ -0,0 +1,7 @@
from dbt.events.types import Event
from typing import List
# the global history of events for this session
# TODO this is naive and the memory footprint is likely far too large.
EVENT_HISTORY: List[Event] = []

44
core/dbt/events/stubs.py Normal file
View File

@@ -0,0 +1,44 @@
from typing import (
Any,
Optional,
NamedTuple,
)
# N.B.:
# These stubs were autogenerated by stubgen and then hacked
# to pieces to ensure we had something other than "Any" types
# where using external classes to instantiate event subclasses
# in events/types.py.
#
# This goes away when we turn mypy on for everything.
#
# Don't trust them too much at all!
class _ReferenceKey(NamedTuple):
database: Any
schema: Any
identifier: Any
class _CachedRelation:
referenced_by: Any
inner: Any
class AdapterResponse:
code: Optional[str]
rows_affected: Optional[int]
class BaseRelation:
path: Any
type: Optional[Any]
quote_character: str
include_policy: Any
quote_policy: Any
dbt_created: bool
class InformationSchema(BaseRelation):
information_schema_view: Optional[str]

View File

@@ -0,0 +1,66 @@
from dataclasses import dataclass
from .types import (
InfoLevel,
DebugLevel,
WarnLevel,
ErrorLevel,
ShowException,
Cli
)
# Keeping log messages for testing separate since they are used for debugging.
# Reuse the existing messages when adding logs to tests.
@dataclass
class IntegrationTestInfo(InfoLevel, Cli):
msg: str
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestDebug(DebugLevel, Cli):
msg: str
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestWarn(WarnLevel, Cli):
msg: str
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestError(ErrorLevel, Cli):
msg: str
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestException(ShowException, ErrorLevel, Cli):
msg: str
def message(self) -> str:
return f"Integration Test: {self.msg}"
# since mypy doesn't run on every file we need to suggest to mypy that every
# class gets instantiated. But we don't actually want to run this code.
# making the conditional `if False` causes mypy to skip it as dead code so
# we need to skirt around that by computing something it doesn't check statically.
#
# TODO remove these lines once we run mypy everywhere.
if 1 == 0:
IntegrationTestInfo(msg='')
IntegrationTestDebug(msg='')
IntegrationTestWarn(msg='')
IntegrationTestError(msg='')
IntegrationTestException(msg='')

2395
core/dbt/events/types.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,9 @@ import builtins
import functools
from typing import NoReturn, Optional, Mapping, Any
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.logger import get_secret_env
from dbt.events.functions import fire_event
from dbt.events.types import GeneralWarningMsg, GeneralWarningException
from dbt.node_types import NodeType
from dbt import flags
from dbt.ui import line_wrap_message, warning_tag
@@ -242,6 +244,15 @@ class ValidationException(RuntimeException):
MESSAGE = "Validation Error"
class ParsingException(RuntimeException):
CODE = 10015
MESSAGE = "Parsing Error"
@property
def type(self):
return 'Parsing'
class JSONValidationException(ValidationException):
def __init__(self, typename, errors):
self.typename = typename
@@ -390,6 +401,8 @@ class CommandError(RuntimeException):
super().__init__(message)
self.cwd = cwd
self.cmd = cmd
for secret in get_secret_env():
self.cmd = str(self.cmd).replace(secret, "*****")
self.args = (cwd, cmd, message)
def __str__(self):
@@ -444,6 +457,10 @@ def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)
def raise_parsing_error(msg, node=None) -> NoReturn:
raise ParsingException(msg, node)
def raise_database_error(msg, node=None) -> NoReturn:
raise DatabaseException(msg, node)
@@ -976,19 +993,14 @@ def warn_or_error(msg, node=None, log_fmt=None):
if flags.WARN_ERROR:
raise_compiler_error(msg, node)
else:
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
fire_event(GeneralWarningMsg(msg=msg, log_fmt=log_fmt))
def warn_or_raise(exc, log_fmt=None):
if flags.WARN_ERROR:
raise exc
else:
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
fire_event(GeneralWarningException(exc=exc, log_fmt=log_fmt))
def warn(msg, node=None):

View File

@@ -17,7 +17,7 @@ PROFILES_DIR = os.path.expanduser(
STRICT_MODE = False # Only here for backwards compatibility
FULL_REFRESH = False # subcommand
STORE_FAILURES = False # subcommand
GREEDY = None # subcommand
EAGER_INDIRECT_SELECTION = True # subcommand
# Global CLI commands
USE_EXPERIMENTAL_PARSER = None
@@ -95,7 +95,7 @@ MP_CONTEXT = _get_context()
def set_from_args(args, user_config):
global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, EAGER_INDIRECT_SELECTION, \
VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH, \
WHICH
@@ -103,7 +103,7 @@ def set_from_args(args, user_config):
# cli args without user_config or env var option
FULL_REFRESH = getattr(args, 'full_refresh', FULL_REFRESH)
STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
GREEDY = getattr(args, 'greedy', GREEDY)
EAGER_INDIRECT_SELECTION = getattr(args, 'indirect_selection', 'eager') != 'cautious'
WHICH = getattr(args, 'which', WHICH)
# global cli flags with env var and user_config alternatives

View File

@@ -20,12 +20,12 @@ from .selector_spec import (
INTERSECTION_DELIMITER = ','
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*']
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*', 'metric:*']
DEFAULT_EXCLUDES: List[str] = []
def parse_union(
components: List[str], expect_exists: bool, greedy: bool = False
components: List[str], expect_exists: bool, eagerly_expand: bool = True
) -> SelectionUnion:
# turn ['a b', 'c'] -> ['a', 'b', 'c']
raw_specs = itertools.chain.from_iterable(
@@ -36,7 +36,7 @@ def parse_union(
# ['a', 'b', 'c,d'] -> union('a', 'b', intersection('c', 'd'))
for raw_spec in raw_specs:
intersection_components: List[SelectionSpec] = [
SelectionCriteria.from_single_spec(part, greedy=greedy)
SelectionCriteria.from_single_spec(part, eagerly_expand=eagerly_expand)
for part in raw_spec.split(INTERSECTION_DELIMITER)
]
union_components.append(SelectionIntersection(
@@ -52,21 +52,25 @@ def parse_union(
def parse_union_from_default(
raw: Optional[List[str]], default: List[str], greedy: bool = False
raw: Optional[List[str]], default: List[str], eagerly_expand: bool = True
) -> SelectionUnion:
components: List[str]
expect_exists: bool
if raw is None:
return parse_union(components=default, expect_exists=False, greedy=greedy)
return parse_union(components=default, expect_exists=False, eagerly_expand=eagerly_expand)
else:
return parse_union(components=raw, expect_exists=True, greedy=greedy)
return parse_union(components=raw, expect_exists=True, eagerly_expand=eagerly_expand)
def parse_difference(
include: Optional[List[str]], exclude: Optional[List[str]]
) -> SelectionDifference:
included = parse_union_from_default(include, DEFAULT_INCLUDES, greedy=bool(flags.GREEDY))
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
included = parse_union_from_default(
include,
DEFAULT_INCLUDES,
eagerly_expand=flags.EAGER_INDIRECT_SELECTION
)
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, eagerly_expand=True)
return SelectionDifference(components=[included, excluded])
@@ -148,7 +152,7 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
union_def_parts = _get_list_dicts(definition, 'union')
include, exclude = _parse_include_exclude_subdefs(union_def_parts)
union = SelectionUnion(components=include, greedy_warning=False)
union = SelectionUnion(components=include)
if exclude is None:
union.raw = definition
@@ -156,8 +160,7 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
else:
return SelectionDifference(
components=[union, exclude],
raw=definition,
greedy_warning=False
raw=definition
)
@@ -166,7 +169,7 @@ def parse_intersection_definition(
) -> SelectionSpec:
intersection_def_parts = _get_list_dicts(definition, 'intersection')
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
intersection = SelectionIntersection(components=include, greedy_warning=False)
intersection = SelectionIntersection(components=include)
if exclude is None:
intersection.raw = definition
@@ -174,8 +177,7 @@ def parse_intersection_definition(
else:
return SelectionDifference(
components=[intersection, exclude],
raw=definition,
greedy_warning=False
raw=definition
)
@@ -209,7 +211,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
if diff_arg is None:
return base
else:
return SelectionDifference(components=[base, diff_arg], greedy_warning=False)
return SelectionDifference(components=[base, diff_arg])
def parse_from_definition(

View File

@@ -1,6 +1,7 @@
from typing import (
Set, Iterable, Iterator, Optional, NewType
)
from itertools import product
import networkx as nx # type: ignore
from dbt.exceptions import InternalException
@@ -77,17 +78,26 @@ class Graph:
successors.update(self.graph.successors(node))
return successors
def get_subset_graph(self, selected: Iterable[UniqueId]) -> 'Graph':
def get_subset_graph(self, selected: Iterable[UniqueId]) -> "Graph":
"""Create and return a new graph that is a shallow copy of the graph,
but with only the nodes in include_nodes. Transitive edges across
removed nodes are preserved as explicit new edges.
"""
new_graph = nx.algorithms.transitive_closure(self.graph)
new_graph = self.graph.copy()
include_nodes = set(selected)
for node in self:
if node not in include_nodes:
source_nodes = [x for x, _ in new_graph.in_edges(node)]
target_nodes = [x for _, x in new_graph.out_edges(node)]
new_edges = product(source_nodes, target_nodes)
non_cyclic_new_edges = [
(source, target) for source, target in new_edges if source != target
] # removes cyclic refs
new_graph.add_edges_from(non_cyclic_new_edges)
new_graph.remove_node(node)
for node in include_nodes:
@@ -96,6 +106,7 @@ class Graph:
"Couldn't find model '{}' -- does it exist or is "
"it disabled?".format(node)
)
return Graph(new_graph)
def subgraph(self, nodes: Iterable[UniqueId]) -> 'Graph':

View File

@@ -5,7 +5,7 @@ from queue import PriorityQueue
from typing import Dict, Set, List, Generator, Optional
from .graph import UniqueId
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure, ParsedMetric
from dbt.contracts.graph.compiled import GraphMemberNode
from dbt.contracts.graph.manifest import Manifest
from dbt.node_types import NodeType
@@ -47,8 +47,8 @@ class GraphQueue:
node = self.manifest.expect(node_id)
if node.resource_type != NodeType.Model:
return False
# must be a Model - tell mypy this won't be a Source or Exposure
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure))
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure, ParsedMetric))
if node.is_ephemeral:
return False
return True

View File

@@ -5,7 +5,8 @@ from .queue import GraphQueue
from .selector_methods import MethodManager
from .selector_spec import SelectionCriteria, SelectionSpec
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.events.functions import fire_event
from dbt.events.types import SelectorReportInvalidSelector
from dbt.node_types import NodeType
from dbt.exceptions import (
InternalException,
@@ -29,24 +30,6 @@ def alert_non_existence(raw_spec, nodes):
)
def alert_unused_nodes(raw_spec, node_names):
summary_nodes_str = ("\n - ").join(node_names[:3])
debug_nodes_str = ("\n - ").join(node_names)
and_more_str = f"\n - and {len(node_names) - 3} more" if len(node_names) > 4 else ""
summary_msg = (
f"\nSome tests were excluded because at least one parent is not selected. "
f"Use the --greedy flag to include them."
f"\n - {summary_nodes_str}{and_more_str}"
)
logger.info(summary_msg)
if len(node_names) > 4:
debug_msg = (
f"Full list of tests that were excluded:"
f"\n - {debug_nodes_str}"
)
logger.debug(debug_msg)
def can_select_indirectly(node):
"""If a node is not selected itself, but its parent(s) are, it may qualify
for indirect selection.
@@ -103,17 +86,17 @@ class NodeSelector(MethodManager):
try:
collected = self.select_included(nodes, spec)
except InvalidSelectorException:
valid_selectors = ", ".join(self.SELECTOR_METHODS)
logger.info(
f"The '{spec.method}' selector specified in {spec.raw} is "
f"invalid. Must be one of [{valid_selectors}]"
)
fire_event(SelectorReportInvalidSelector(
selector_methods=self.SELECTOR_METHODS,
spec_method=spec.method,
raw_spec=spec.raw
))
return set(), set()
neighbors = self.collect_specified_neighbors(spec, collected)
direct_nodes, indirect_nodes = self.expand_selection(
selected=(collected | neighbors),
greedy=spec.greedy
eagerly_expand=spec.eagerly_expand
)
return direct_nodes, indirect_nodes
@@ -185,6 +168,8 @@ class NodeSelector(MethodManager):
return source.config.enabled
elif unique_id in self.manifest.exposures:
return True
elif unique_id in self.manifest.metrics:
return True
node = self.manifest.nodes[unique_id]
return not node.empty and node.config.enabled
@@ -202,6 +187,8 @@ class NodeSelector(MethodManager):
node = self.manifest.sources[unique_id]
elif unique_id in self.manifest.exposures:
node = self.manifest.exposures[unique_id]
elif unique_id in self.manifest.metrics:
node = self.manifest.metrics[unique_id]
else:
raise InternalException(
f'Node {unique_id} not found in the manifest!'
@@ -217,21 +204,20 @@ class NodeSelector(MethodManager):
}
def expand_selection(
self, selected: Set[UniqueId], greedy: bool = False
self, selected: Set[UniqueId], eagerly_expand: bool = True
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
# Test selection can expand to include an implicitly/indirectly selected test.
# In this way, `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, GREEDY and NOT GREEDY.
# Test selection by default expands to include an implicitly/indirectly selected tests.
# `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, EAGER and CAUTIOUS.
#
# GREEDY mode: If ANY parent is selected, select the test. We use this for EXCLUSION.
# EAGER mode: If ANY parent is selected, select the test.
#
# NOT GREEDY mode:
# CAUTIOUS mode:
# - If ALL parents are selected, select the test.
# - If ANY parent is missing, return it separately. We'll keep it around
# for later and see if its other parents show up.
# We use this for INCLUSION.
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
# or by specifying `greedy: true` in a yaml selector
# Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
# CLI argument or by specifying `eagerly_expand: true` in a yaml selector
direct_nodes = set(selected)
indirect_nodes = set()
@@ -241,7 +227,7 @@ class NodeSelector(MethodManager):
node = self.manifest.nodes[unique_id]
if can_select_indirectly(node):
# should we add it in directly?
if greedy or set(node.depends_on.nodes) <= set(selected):
if eagerly_expand or set(node.depends_on.nodes) <= set(selected):
direct_nodes.add(unique_id)
# if not:
else:
@@ -255,6 +241,10 @@ class NodeSelector(MethodManager):
# Check tests previously selected indirectly to see if ALL their
# parents are now present.
# performance: if identical, skip the processing below
if set(direct_nodes) == set(indirect_nodes):
return direct_nodes
selected = set(direct_nodes)
for unique_id in indirect_nodes:
@@ -278,16 +268,6 @@ class NodeSelector(MethodManager):
selected_nodes, indirect_only = self.select_nodes(spec)
filtered_nodes = self.filter_selection(selected_nodes)
if indirect_only:
filtered_unused_nodes = self.filter_selection(indirect_only)
if filtered_unused_nodes and spec.greedy_warning:
# log anything that didn't make the cut
unused_node_names = []
for unique_id in filtered_unused_nodes:
name = self.manifest.nodes[unique_id].name
unused_node_names.append(name)
alert_unused_nodes(spec, unused_node_names)
return filtered_nodes
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:

View File

@@ -18,6 +18,7 @@ from dbt.contracts.graph.parsed import (
HasTestMetadata,
ParsedSingularTestNode,
ParsedExposure,
ParsedMetric,
ParsedGenericTestNode,
ParsedSourceDefinition,
)
@@ -45,6 +46,7 @@ class MethodName(StrEnum):
ResourceType = 'resource_type'
State = 'state'
Exposure = 'exposure'
Metric = 'metric'
Result = 'result'
@@ -72,7 +74,7 @@ def is_selected_node(fqn: List[str], node_selector: str):
return True
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure]
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure, ParsedMetric]
class SelectorMethod(metaclass=abc.ABCMeta):
@@ -119,13 +121,25 @@ class SelectorMethod(metaclass=abc.ABCMeta):
continue
yield unique_id, exposure
def metric_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ParsedMetric]]:
for key, metric in self.manifest.metrics.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, metric
def all_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
yield from chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes),
self.exposure_nodes(included_nodes))
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
def configurable_nodes(
self,
@@ -137,9 +151,10 @@ class SelectorMethod(metaclass=abc.ABCMeta):
def non_source_nodes(
self,
included_nodes: Set[UniqueId],
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode]]]:
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode, ParsedMetric]]]:
yield from chain(self.parsed_nodes(included_nodes),
self.exposure_nodes(included_nodes))
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
@abc.abstractmethod
def search(
@@ -251,6 +266,33 @@ class ExposureSelectorMethod(SelectorMethod):
yield node
class MetricSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
parts = selector.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_name = parts[0]
elif len(parts) == 2:
target_package, target_name = parts
else:
msg = (
'Invalid metric selector value "{}". Metrics must be of '
'the form ${{metric_name}} or '
'${{metric_package.metric_name}}'
).format(selector)
raise RuntimeException(msg)
for node, real_node in self.metric_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_name not in (real_node.name, SELECTOR_GLOB):
continue
yield node
class PathSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
@@ -512,6 +554,8 @@ class StateSelectorMethod(SelectorMethod):
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
if checker(previous_node, real_node):
yield node
@@ -544,8 +588,10 @@ class MethodManager:
MethodName.Config: ConfigSelectorMethod,
MethodName.TestName: TestNameSelectorMethod,
MethodName.TestType: TestTypeSelectorMethod,
MethodName.ResourceType: ResourceTypeSelectorMethod,
MethodName.State: StateSelectorMethod,
MethodName.Exposure: ExposureSelectorMethod,
MethodName.Metric: MetricSelectorMethod,
MethodName.Result: ResultSelectorMethod,
}

View File

@@ -66,8 +66,7 @@ class SelectionCriteria:
parents_depth: Optional[int]
children: bool
children_depth: Optional[int]
greedy: bool = False
greedy_warning: bool = False # do not raise warning for yaml selectors
eagerly_expand: bool = True
def __post_init__(self):
if self.children and self.childrens_parents:
@@ -105,7 +104,7 @@ class SelectionCriteria:
@classmethod
def selection_criteria_from_dict(
cls, raw: Any, dct: Dict[str, Any], greedy: bool = False
cls, raw: Any, dct: Dict[str, Any], eagerly_expand: bool = True
) -> 'SelectionCriteria':
if 'value' not in dct:
raise RuntimeException(
@@ -115,6 +114,17 @@ class SelectionCriteria:
parents_depth = _match_to_int(dct, 'parents_depth')
children_depth = _match_to_int(dct, 'children_depth')
# If defined field in selector, override CLI flag
indirect_selection = dct.get('indirect_selection', None)
if indirect_selection:
if indirect_selection in ['eager', 'cautious']:
eagerly_expand = indirect_selection != 'cautious'
else:
raise RuntimeException(
f'indirect_selection value "{indirect_selection}" is not valid!'
)
return cls(
raw=raw,
method=method_name,
@@ -125,7 +135,7 @@ class SelectionCriteria:
parents_depth=parents_depth,
children=bool(dct.get('children')),
children_depth=children_depth,
greedy=(greedy or bool(dct.get('greedy'))),
eagerly_expand=eagerly_expand
)
@classmethod
@@ -146,18 +156,20 @@ class SelectionCriteria:
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
if 'greedy' in dct:
dct['greedy'] = bool(dct.get('greedy'))
return dct
@classmethod
def from_single_spec(cls, raw: str, greedy: bool = False) -> 'SelectionCriteria':
def from_single_spec(cls, raw: str, eagerly_expand: bool = True) -> 'SelectionCriteria':
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
# bad spec!
raise RuntimeException(f'Invalid selector spec "{raw}"')
return cls.selection_criteria_from_dict(raw, result.groupdict(), greedy=greedy)
return cls.selection_criteria_from_dict(
raw,
result.groupdict(),
eagerly_expand=eagerly_expand
)
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
@@ -165,12 +177,10 @@ class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
self,
components: Iterable[SelectionSpec],
expect_exists: bool = False,
greedy_warning: bool = True,
raw: Any = None,
):
self.components: List[SelectionSpec] = list(components)
self.expect_exists = expect_exists
self.greedy_warning = greedy_warning
self.raw = raw
def __iter__(self) -> Iterator[SelectionSpec]:

View File

@@ -2,5 +2,6 @@ config-version: 2
name: dbt
version: 1.0
docs-paths: ['docs']
docs-paths: ["docs"]
macro-paths: ["macros"]
test-paths: ["tests"]

View File

@@ -26,7 +26,7 @@ On model pages, you'll see the immediate parents and children of the model you'r
button at the top-right of this lineage pane, you'll be able to see all of the models that are used to build,
or are built from, the model you're exploring.
Once expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the
Once expanded, you'll be able to use the `--select` and `--exclude` model selection syntax to filter the
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).
Note that you can also right-click on models to interactively filter and explore the graph.
@@ -38,6 +38,6 @@ Note that you can also right-click on models to interactively filter and explore
- [What is dbt](https://docs.getdbt.com/docs/overview)?
- Read the [dbt viewpoint](https://docs.getdbt.com/docs/viewpoint)
- [Installation](https://docs.getdbt.com/docs/installation)
- Join the [chat](https://community.getdbt.com/) on Slack for live questions and support.
- Join the [dbt Community](https://www.getdbt.com/community/) for questions and discussion
{% enddocs %}

View File

@@ -0,0 +1,89 @@
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{# helper for adapter-specific implementations of get_columns_in_relation #}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if add_columns and remove_columns }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -1,344 +0,0 @@
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}
{% macro create_table_as(temporary, relation, sql) -%}
{{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, sql) }}
{%- endmacro %}
{% macro default__create_table_as(temporary, relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
as (
{{ sql }}
);
{% endmacro %}
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro create_view_as(relation, sql) -%}
{{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}
{%- endmacro %}
{% macro default__create_view_as(relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create view {{ relation }} as (
{{ sql }}
);
{% endmacro %}
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if remove_columns | length > 0 }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -0,0 +1,26 @@
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}

View File

@@ -0,0 +1,23 @@
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}

View File

@@ -0,0 +1,65 @@
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

View File

@@ -0,0 +1,33 @@
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,84 @@
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro get_or_create_relation(database, schema, identifier, type) -%}
{{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }}
{% endmacro %}
{% macro default__get_or_create_relation(database, schema, identifier, type) %}
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
{% if target_relation %}
{% do return([true, target_relation]) %}
{% endif %}
{%- set new_relation = api.Relation.create(
database=database,
schema=schema,
identifier=identifier,
type=type
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}
{# a user-friendly interface into adapter.get_relation #}
{% macro load_relation(relation) %}
{% do return(adapter.get_relation(
database=relation.database,
schema=relation.schema,
identifier=relation.identifier
)) -%}
{% endmacro %}
{# not used much, here for backwards compatibility #}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,20 @@
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}

View File

@@ -1,4 +1,3 @@
{% macro convert_datetime(date_str, date_fmt) %}
{% set error_msg -%}
@@ -10,6 +9,7 @@
{% endmacro %}
{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt="%Y%m%d", out_fmt="%Y%m%d") %}
{% set end_date_str = start_date_str if end_date_str is none else end_date_str %}
@@ -38,6 +38,7 @@
{{ return(date_list) }}
{% endmacro %}
{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}
{% set partition_range = (raw_partition_date | string).split(",") %}
@@ -54,6 +55,7 @@
{{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}
{% endmacro %}
{% macro py_current_timestring() %}
{% set dt = modules.datetime.datetime.now() %}
{% do return(dt.strftime("%Y%m%d%H%M%S%f")) %}

View File

@@ -1,8 +0,0 @@
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -15,6 +15,7 @@
{%- endif -%}
{%- endmacro %}
{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}
{%- set sql = caller() -%}
@@ -28,3 +29,13 @@
{%- endif -%}
{%- endmacro %}
{# a user-friendly interface into statements #}
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -25,9 +25,3 @@ where value_field not in (
)
{% endmacro %}
{% test accepted_values(model, column_name, values, quote=True) %}
{% set macro = adapter.dispatch('test_accepted_values', 'dbt') %}
{{ macro(model, column_name, values, quote) }}
{% endtest %}

View File

@@ -0,0 +1,8 @@
{% macro default__test_not_null(model, column_name) %}
select *
from {{ model }}
where {{ column_name }} is null
{% endmacro %}

View File

@@ -1,4 +1,3 @@
{% macro default__test_relationships(model, column_name, to, field) %}
with child as (
@@ -22,9 +21,3 @@ left join parent
where parent.to_field is null
{% endmacro %}
{% test relationships(model, column_name, to, field) %}
{% set macro = adapter.dispatch('test_relationships', 'dbt') %}
{{ macro(model, column_name, to, field) }}
{% endtest %}

View File

@@ -11,8 +11,3 @@ having count(*) > 1
{% endmacro %}
{% test unique(model, column_name) %}
{% set macro = adapter.dispatch('test_unique', 'dbt') %}
{{ macro(model, column_name) }}
{% endtest %}

View File

@@ -1,13 +0,0 @@
{% macro default__test_not_null(model, column_name) %}
select *
from {{ model }}
where {{ column_name }} is null
{% endmacro %}
{% test not_null(model, column_name) %}
{% set macro = adapter.dispatch('test_not_null', 'dbt') %}
{{ macro(model, column_name) }}
{% endtest %}

View File

@@ -12,6 +12,7 @@
node: The available node that an alias is being generated for, or none
#}
{% macro generate_alias_name(custom_alias_name=none, node=none) -%}
{% do return(adapter.dispatch('generate_alias_name', 'dbt')(custom_alias_name, node)) %}
{%- endmacro %}

View File

@@ -14,7 +14,7 @@
node: The node the schema is being generated for
#}
{% macro generate_schema_name(custom_schema_name, node) -%}
{% macro generate_schema_name(custom_schema_name=none, node=none) -%}
{{ return(adapter.dispatch('generate_schema_name', 'dbt')(custom_schema_name, node)) }}
{% endmacro %}

View File

@@ -0,0 +1,21 @@
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}
{% macro should_full_refresh() %}
{% set config_full_refresh = config.get('full_refresh') %}
{% if config_full_refresh is none %}
{% set config_full_refresh = flags.FULL_REFRESH %}
{% endif %}
{% do return(config_full_refresh) %}
{% endmacro %}
{% macro should_store_failures() %}
{% set config_store_failures = config.get('store_failures') %}
{% if config_store_failures is none %}
{% set config_store_failures = flags.STORE_FAILURES %}
{% endif %}
{% do return(config_store_failures) %}
{% endmacro %}

View File

@@ -1,83 +0,0 @@
{% macro run_hooks(hooks, inside_transaction=True) %}
{% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}
{% if not inside_transaction and loop.first %}
{% call statement(auto_begin=inside_transaction) %}
commit;
{% endcall %}
{% endif %}
{% set rendered = render(hook.get('sql')) | trim %}
{% if (rendered | length) > 0 %}
{% call statement(auto_begin=inside_transaction) %}
{{ rendered }}
{% endcall %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro column_list(columns) %}
{%- for col in columns %}
{{ col.name }} {% if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
{% macro column_list_for_create_table(columns) %}
{%- for col in columns %}
{{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
{% macro make_hook_config(sql, inside_transaction) %}
{{ tojson({"sql": sql, "transaction": inside_transaction}) }}
{% endmacro %}
{% macro before_begin(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro in_transaction(sql) %}
{{ make_hook_config(sql, inside_transaction=True) }}
{% endmacro %}
{% macro after_commit(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}
{% macro load_relation(relation) %}
{% do return(adapter.get_relation(
database=relation.database,
schema=relation.schema,
identifier=relation.identifier
)) -%}
{% endmacro %}
{% macro should_full_refresh() %}
{% set config_full_refresh = config.get('full_refresh') %}
{% if config_full_refresh is none %}
{% set config_full_refresh = flags.FULL_REFRESH %}
{% endif %}
{% do return(config_full_refresh) %}
{% endmacro %}
{% macro should_store_failures() %}
{% set config_store_failures = config.get('store_failures') %}
{% if config_store_failures is none %}
{% set config_store_failures = flags.STORE_FAILURES %}
{% endif %}
{% do return(config_store_failures) %}
{% endmacro %}

View File

@@ -0,0 +1,35 @@
{% macro run_hooks(hooks, inside_transaction=True) %}
{% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}
{% if not inside_transaction and loop.first %}
{% call statement(auto_begin=inside_transaction) %}
commit;
{% endcall %}
{% endif %}
{% set rendered = render(hook.get('sql')) | trim %}
{% if (rendered | length) > 0 %}
{% call statement(auto_begin=inside_transaction) %}
{{ rendered }}
{% endcall %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro make_hook_config(sql, inside_transaction) %}
{{ tojson({"sql": sql, "transaction": inside_transaction}) }}
{% endmacro %}
{% macro before_begin(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro in_transaction(sql) %}
{{ make_hook_config(sql, inside_transaction=True) }}
{% endmacro %}
{% macro after_commit(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}

View File

@@ -1,21 +0,0 @@
{% macro incremental_upsert(tmp_relation, target_relation, unique_key=none, statement_name="main") %}
{%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
{%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
{%- if unique_key is not none -%}
delete
from {{ target_relation }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ tmp_relation }}
);
{%- endif %}
insert into {{ target_relation }} ({{ dest_cols_csv }})
(
select {{ dest_cols_csv }}
from {{ tmp_relation }}
);
{%- endmacro %}

View File

@@ -0,0 +1,52 @@
/* {#
Helper macros for internal use with incremental materializations.
Use with care if calling elsewhere.
#} */
{% macro get_quoted_csv(column_names) %}
{% set quoted = [] %}
{% for col in column_names -%}
{%- do quoted.append(adapter.quote(col)) -%}
{%- endfor %}
{%- set dest_cols_csv = quoted | join(', ') -%}
{{ return(dest_cols_csv) }}
{% endmacro %}
{% macro diff_columns(source_columns, target_columns) %}
{% set result = [] %}
{% set source_names = source_columns | map(attribute = 'column') | list %}
{% set target_names = target_columns | map(attribute = 'column') | list %}
{# --check whether the name attribute exists in the target - this does not perform a data type check #}
{% for sc in source_columns %}
{% if sc.name not in target_names %}
{{ result.append(sc) }}
{% endif %}
{% endfor %}
{{ return(result) }}
{% endmacro %}
{% macro diff_column_data_types(source_columns, target_columns) %}
{% set result = [] %}
{% for sc in source_columns %}
{% set tc = target_columns | selectattr("name", "equalto", sc.name) | list | first %}
{% if tc %}
{% if sc.data_type != tc.data_type %}
{{ result.append( { 'column_name': tc.name, 'new_type': sc.data_type } ) }}
{% endif %}
{% endif %}
{% endfor %}
{{ return(result) }}
{% endmacro %}

View File

@@ -53,8 +53,12 @@
{% do adapter.expand_target_column_types(
from_relation=tmp_relation,
to_relation=target_relation) %}
{% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
{% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}
{#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}
{% set dest_columns = process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
{% if not dest_columns %}
{% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}
{% endif %}
{% set build_sql = get_delete_insert_merge_sql(target_relation, tmp_relation, unique_key, dest_columns) %}
{% endif %}

View File

@@ -1,20 +1,7 @@
{% macro get_merge_sql(target, source, unique_key, dest_columns, predicates=none) -%}
{{ adapter.dispatch('get_merge_sql', 'dbt')(target, source, unique_key, dest_columns, predicates) }}
{%- endmacro %}
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns) }}
{%- endmacro %}
{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}
{{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}
{%- endmacro %}
{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}
{%- set predicates = [] if predicates is none else [] + predicates -%}
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
@@ -52,18 +39,11 @@
{% endmacro %}
{% macro get_quoted_csv(column_names) %}
{% set quoted = [] %}
{% for col in column_names -%}
{%- do quoted.append(adapter.quote(col)) -%}
{%- endfor %}
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns) }}
{%- endmacro %}
{%- set dest_cols_csv = quoted | join(', ') -%}
{{ return(dest_cols_csv) }}
{% endmacro %}
{% macro common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
@@ -83,10 +63,10 @@
{%- endmacro %}
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) }}
{% endmacro %}
{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}
{{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}
{%- endmacro %}
{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}
{%- set predicates = [] if predicates is none else [] + predicates -%}

View File

@@ -15,39 +15,6 @@
{% endmacro %}
{% macro diff_columns(source_columns, target_columns) %}
{% set result = [] %}
{% set source_names = source_columns | map(attribute = 'column') | list %}
{% set target_names = target_columns | map(attribute = 'column') | list %}
{# --check whether the name attribute exists in the target - this does not perform a data type check #}
{% for sc in source_columns %}
{% if sc.name not in target_names %}
{{ result.append(sc) }}
{% endif %}
{% endfor %}
{{ return(result) }}
{% endmacro %}
{% macro diff_column_data_types(source_columns, target_columns) %}
{% set result = [] %}
{% for sc in source_columns %}
{% set tc = target_columns | selectattr("name", "equalto", sc.name) | list | first %}
{% if tc %}
{% if sc.data_type != tc.data_type %}
{{ result.append( { 'column_name': tc.name, 'new_type': sc.data_type } ) }}
{% endif %}
{% endif %}
{% endfor %}
{{ return(result) }}
{% endmacro %}
{% macro check_for_schema_changes(source_relation, target_relation) %}
@@ -57,7 +24,7 @@
{%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}
{%- set source_not_in_target = diff_columns(source_columns, target_columns) -%}
{%- set target_not_in_source = diff_columns(target_columns, source_columns) -%}
{% set new_target_types = diff_column_data_types(source_columns, target_columns) %}
{% if source_not_in_target != [] %}
@@ -72,6 +39,8 @@
'schema_changed': schema_changed,
'source_not_in_target': source_not_in_target,
'target_not_in_source': target_not_in_source,
'source_columns': source_columns,
'target_columns': target_columns,
'new_target_types': new_target_types
} %}
@@ -132,7 +101,11 @@
{% macro process_schema_changes(on_schema_change, source_relation, target_relation) %}
{% if on_schema_change != 'ignore' %}
{% if on_schema_change == 'ignore' %}
{{ return({}) }}
{% else %}
{% set schema_changes_dict = check_for_schema_changes(source_relation, target_relation) %}
@@ -158,6 +131,8 @@
{% endif %}
{% endif %}
{{ return(schema_changes_dict['source_columns']) }}
{% endif %}

View File

@@ -0,0 +1,25 @@
{% macro get_create_table_as_sql(temporary, relation, sql) -%}
{{ adapter.dispatch('get_create_table_as_sql', 'dbt')(temporary, relation, sql) }}
{%- endmacro %}
{% macro default__get_create_table_as_sql(temporary, relation, sql) -%}
{{ return(create_table_as(temporary, relation, sql)) }}
{% endmacro %}
/* {# keep logic under old macro name for backwards compatibility #} */
{% macro create_table_as(temporary, relation, sql) -%}
{{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, sql) }}
{%- endmacro %}
{% macro default__create_table_as(temporary, relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
as (
{{ sql }}
);
{%- endmacro %}

View File

@@ -43,7 +43,7 @@
-- build model
{% call statement('main') -%}
{{ create_table_as(False, intermediate_relation, sql) }}
{{ get_create_table_as_sql(False, intermediate_relation, sql) }}
{%- endcall %}
-- cleanup

View File

@@ -1,14 +1,4 @@
{% macro handle_existing_table(full_refresh, old_relation) %}
{{ adapter.dispatch('handle_existing_table', 'dbt')(full_refresh, old_relation) }}
{% endmacro %}
{% macro default__handle_existing_table(full_refresh, old_relation) %}
{{ log("Dropping relation " ~ old_relation ~ " because it is of type " ~ old_relation.type) }}
{{ adapter.drop_relation(old_relation) }}
{% endmacro %}
{# /*
/* {#
Core materialization implementation. BigQuery and Snowflake are similar
because both can use `create or replace view` where the resulting view schema
is not necessarily the same as the existing view. On Redshift, this would
@@ -17,8 +7,7 @@
This implementation is superior to the create_temp, swap_with_existing, drop_old
paradigm because transactions don't run DDL queries atomically on Snowflake. By using
`create or replace view`, the materialization becomes atomic in nature.
*/
#}
#} */
{% macro create_or_replace_view() %}
{%- set identifier = model['alias'] -%}
@@ -42,7 +31,7 @@
-- build model
{% call statement('main') -%}
{{ create_view_as(target_relation, sql) }}
{{ get_create_view_as_sql(target_relation, sql) }}
{%- endcall %}
{{ run_hooks(post_hooks) }}

View File

@@ -0,0 +1,22 @@
{% macro get_create_view_as_sql(relation, sql) -%}
{{ adapter.dispatch('get_create_view_as_sql', 'dbt')(relation, sql) }}
{%- endmacro %}
{% macro default__get_create_view_as_sql(relation, sql) -%}
{{ return(create_view_as(relation, sql)) }}
{% endmacro %}
/* {# keep logic under old name for backwards compatibility #} */
{% macro create_view_as(relation, sql) -%}
{{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}
{%- endmacro %}
{% macro default__create_view_as(relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create view {{ relation }} as (
{{ sql }}
);
{%- endmacro %}

View File

@@ -0,0 +1,8 @@
{% macro handle_existing_table(full_refresh, old_relation) %}
{{ adapter.dispatch('handle_existing_table', 'dbt')(full_refresh, old_relation) }}
{% endmacro %}
{% macro default__handle_existing_table(full_refresh, old_relation) %}
{{ log("Dropping relation " ~ old_relation ~ " because it is of type " ~ old_relation.type) }}
{{ adapter.drop_relation(old_relation) }}
{% endmacro %}

View File

@@ -25,6 +25,7 @@
{{ return(sql) }}
{% endmacro %}
{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%}
{{ adapter.dispatch('reset_csv_table', 'dbt')(model, full_refresh, old_relation, agate_table) }}
{%- endmacro %}
@@ -42,6 +43,7 @@
{{ return(sql) }}
{% endmacro %}
{% macro get_binding_char() -%}
{{ adapter.dispatch('get_binding_char', 'dbt')() }}
{%- endmacro %}
@@ -50,6 +52,7 @@
{{ return('%s') }}
{% endmacro %}
{% macro get_batch_size() -%}
{{ return(adapter.dispatch('get_batch_size', 'dbt')()) }}
{%- endmacro %}
@@ -58,6 +61,7 @@
{{ return(10000) }}
{% endmacro %}
{% macro get_seed_column_quoted_csv(model, column_names) %}
{%- set quote_seed_column = model['config'].get('quote_columns', None) -%}
{% set quoted = [] %}
@@ -69,6 +73,7 @@
{{ return(dest_cols_csv) }}
{% endmacro %}
{% macro load_csv_rows(model, agate_table) -%}
{{ adapter.dispatch('load_csv_rows', 'dbt')(model, agate_table) }}
{%- endmacro %}
@@ -110,59 +115,3 @@
{# Return SQL so we can render it out into the compiled files #}
{{ return(statements[0]) }}
{% endmacro %}
{% materialization seed, default %}
{%- set identifier = model['alias'] -%}
{%- set full_refresh_mode = (should_full_refresh()) -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{%- set agate_table = load_agate_table() -%}
{%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% set create_table_sql = "" %}
{% if exists_as_view %}
{{ exceptions.raise_compiler_error("Cannot seed to '{}', it is a view".format(old_relation)) }}
{% elif exists_as_table %}
{% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}
{% else %}
{% set create_table_sql = create_csv_table(model, agate_table) %}
{% endif %}
{% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}
{% set rows_affected = (agate_table.rows | length) %}
{% set sql = load_csv_rows(model, agate_table) %}
{% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}
{{ create_table_sql }};
-- dbt seed --
{{ sql }}
{% endcall %}
{% set target_relation = this.incorporate(type='table') %}
{% do persist_docs(target_relation, model) %}
{% if full_refresh_mode or not exists_as_table %}
{% do create_indexes(target_relation) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{{ return({'relations': [target_relation]}) }}
{% endmaterialization %}

View File

@@ -0,0 +1,55 @@
{% materialization seed, default %}
{%- set identifier = model['alias'] -%}
{%- set full_refresh_mode = (should_full_refresh()) -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{%- set agate_table = load_agate_table() -%}
{%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% set create_table_sql = "" %}
{% if exists_as_view %}
{{ exceptions.raise_compiler_error("Cannot seed to '{}', it is a view".format(old_relation)) }}
{% elif exists_as_table %}
{% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}
{% else %}
{% set create_table_sql = create_csv_table(model, agate_table) %}
{% endif %}
{% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}
{% set rows_affected = (agate_table.rows | length) %}
{% set sql = load_csv_rows(model, agate_table) %}
{% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}
{{ create_table_sql }};
-- dbt seed --
{{ sql }}
{% endcall %}
{% set target_relation = this.incorporate(type='table') %}
{% do persist_docs(target_relation, model) %}
{% if full_refresh_mode or not exists_as_table %}
{% do create_indexes(target_relation) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{{ return({'relations': [target_relation]}) }}
{% endmaterialization %}

View File

@@ -24,6 +24,10 @@
{% macro snapshot_staging_table(strategy, source_sql, target_relation) -%}
{{ adapter.dispatch('snapshot_staging_table', 'dbt')(strategy, source_sql, target_relation) }}
{% endmacro %}
{% macro default__snapshot_staging_table(strategy, source_sql, target_relation) -%}
with snapshot_query as (
@@ -139,7 +143,11 @@
{%- endmacro %}
{% macro build_snapshot_table(strategy, sql) %}
{% macro build_snapshot_table(strategy, sql) -%}
{{ adapter.dispatch('build_snapshot_table', 'dbt')(strategy, sql) }}
{% endmacro %}
{% macro default__build_snapshot_table(strategy, sql) %}
select *,
{{ strategy.scd_id }} as dbt_scd_id,
@@ -153,22 +161,6 @@
{% endmacro %}
{% macro get_or_create_relation(database, schema, identifier, type) %}
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
{% if target_relation %}
{% do return([true, target_relation]) %}
{% endif %}
{%- set new_relation = api.Relation.create(
database=database,
schema=schema,
identifier=identifier,
type=type
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}
{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}
{% set tmp_relation = make_temp_relation(target_relation) %}
@@ -180,103 +172,3 @@
{% do return(tmp_relation) %}
{% endmacro %}
{% materialization snapshot, default %}
{%- set config = model['config'] -%}
{%- set target_table = model.get('alias', model.get('name')) -%}
{%- set strategy_name = config.get('strategy') -%}
{%- set unique_key = config.get('unique_key') %}
{% if not adapter.check_schema_exists(model.database, model.schema) %}
{% do create_schema(model.database, model.schema) %}
{% endif %}
{% set target_relation_exists, target_relation = get_or_create_relation(
database=model.database,
schema=model.schema,
identifier=target_table,
type='table') -%}
{%- if not target_relation.is_table -%}
{% do exceptions.relation_wrong_type(target_relation, 'table') %}
{%- endif -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
{{ run_hooks(pre_hooks, inside_transaction=True) }}
{% set strategy_macro = strategy_dispatch(strategy_name) %}
{% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %}
{% if not target_relation_exists %}
{% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}
{% set final_sql = create_table_as(False, target_relation, build_sql) %}
{% else %}
{{ adapter.valid_snapshot_target(target_relation) }}
{% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}
-- this may no-op if the database does not require column expansion
{% do adapter.expand_target_column_types(from_relation=staging_table,
to_relation=target_relation) %}
{% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% do create_columns(target_relation, missing_columns) %}
{% set source_columns = adapter.get_columns_in_relation(staging_table)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% set quoted_source_columns = [] %}
{% for column in source_columns %}
{% do quoted_source_columns.append(adapter.quote(column.name)) %}
{% endfor %}
{% set final_sql = snapshot_merge_sql(
target = target_relation,
source = staging_table,
insert_cols = quoted_source_columns
)
%}
{% endif %}
{% call statement('main') %}
{{ final_sql }}
{% endcall %}
{% do persist_docs(target_relation, model) %}
{% if not target_relation_exists %}
{% do create_indexes(target_relation) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
{{ adapter.commit() }}
{% if staging_table is defined %}
{% do post_snapshot(staging_table) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{{ return({'relations': [target_relation]}) }}
{% endmaterialization %}

View File

@@ -0,0 +1,98 @@
{% materialization snapshot, default %}
{%- set config = model['config'] -%}
{%- set target_table = model.get('alias', model.get('name')) -%}
{%- set strategy_name = config.get('strategy') -%}
{%- set unique_key = config.get('unique_key') %}
{% if not adapter.check_schema_exists(model.database, model.schema) %}
{% do create_schema(model.database, model.schema) %}
{% endif %}
{% set target_relation_exists, target_relation = get_or_create_relation(
database=model.database,
schema=model.schema,
identifier=target_table,
type='table') -%}
{%- if not target_relation.is_table -%}
{% do exceptions.relation_wrong_type(target_relation, 'table') %}
{%- endif -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
{{ run_hooks(pre_hooks, inside_transaction=True) }}
{% set strategy_macro = strategy_dispatch(strategy_name) %}
{% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %}
{% if not target_relation_exists %}
{% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}
{% set final_sql = create_table_as(False, target_relation, build_sql) %}
{% else %}
{{ adapter.valid_snapshot_target(target_relation) }}
{% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}
-- this may no-op if the database does not require column expansion
{% do adapter.expand_target_column_types(from_relation=staging_table,
to_relation=target_relation) %}
{% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% do create_columns(target_relation, missing_columns) %}
{% set source_columns = adapter.get_columns_in_relation(staging_table)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% set quoted_source_columns = [] %}
{% for column in source_columns %}
{% do quoted_source_columns.append(adapter.quote(column.name)) %}
{% endfor %}
{% set final_sql = snapshot_merge_sql(
target = target_relation,
source = staging_table,
insert_cols = quoted_source_columns
)
%}
{% endif %}
{% call statement('main') %}
{{ final_sql }}
{% endcall %}
{% do persist_docs(target_relation, model) %}
{% if not target_relation_exists %}
{% do create_indexes(target_relation) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
{{ adapter.commit() }}
{% if staging_table is defined %}
{% do post_snapshot(staging_table) %}
{% endif %}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{{ return({'relations': [target_relation]}) }}
{% endmaterialization %}

View File

@@ -39,7 +39,6 @@
{{ adapter.dispatch('snapshot_hash_arguments', 'dbt')(args) }}
{%- endmacro %}
{% macro default__snapshot_hash_arguments(args) -%}
md5({%- for arg in args -%}
coalesce(cast({{ arg }} as varchar ), '')
@@ -97,7 +96,6 @@
{{ adapter.dispatch('snapshot_string_as_time', 'dbt')(timestamp) }}
{%- endmacro %}
{% macro default__snapshot_string_as_time(timestamp) %}
{% do exceptions.raise_not_implemented(
'snapshot_string_as_time macro not implemented for adapter '+adapter.type()

View File

@@ -0,0 +1,14 @@
{% macro get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}
{{ adapter.dispatch('get_test_sql', 'dbt')(main_sql, fail_calc, warn_if, error_if, limit) }}
{%- endmacro %}
{% macro default__get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}
select
{{ fail_calc }} as failures,
{{ fail_calc }} {{ warn_if }} as should_warn,
{{ fail_calc }} {{ error_if }} as should_error
from (
{{ main_sql }}
{{ "limit " ~ limit if limit != none }}
) dbt_internal_test
{%- endmacro %}

Some files were not shown because too many files have changed in this diff Show More