Compare commits

..

22 Commits

Author SHA1 Message Date
Nathaniel May
1e2bcef0ac use concrete stdlib loggers 2021-11-05 12:28:30 -04:00
Nathaniel May
a546e79d06 inline common processors 2021-11-05 10:38:22 -04:00
Nathaniel May
b8c9555347 logging works now but too many times. 2021-11-05 10:21:23 -04:00
Nathaniel May
2c8f1c9a78 split out loggers into values 2021-11-04 16:55:07 -04:00
Nathaniel May
b1db4d7978 move two log messages to event system 2021-11-03 15:54:53 -04:00
Nathaniel May
d1b9fbb7a3 bug fix for logging 2021-11-03 12:29:43 -04:00
Nathaniel May
29f34769df remove print 2021-11-03 11:57:45 -04:00
Nathaniel May
6ca5fa8f4a first pass at json logging. fails. 2021-11-03 11:57:04 -04:00
Nathaniel May
9febe38781 add adapter logging interface, and change postgres adapter to use it. 2021-11-02 13:53:07 -04:00
Nathaniel May
a517375c6c add comment 2021-11-02 10:47:01 -04:00
Nathaniel May
a9758297d5 make logger global 2021-11-02 10:46:33 -04:00
Nathaniel May
c087d3b2dc failed attempt at file logging 2021-11-01 16:27:01 -04:00
Nathaniel May
55b33031fc use structlog configs 2021-11-01 13:22:02 -04:00
Nathaniel May
593b562611 refactor for cleaner if else tree 2021-11-01 12:21:28 -04:00
Nathaniel May
57d364212d move datetime into event type 2021-11-01 11:32:24 -04:00
Nathaniel May
415cc9c702 add structlog to event module 2021-11-01 10:19:42 -04:00
Nathaniel May
d2f0e2d1e1 Change Graph logger call sites (#4165)
graph call sites for structured logging
2021-10-29 17:08:30 -04:00
Nathaniel May
e29db5897f Client call sites (#4163)
update log call sites with new event system
2021-10-29 16:35:48 -04:00
Nathaniel May
87b8ca9615 Handle exec info (#4168)
handle exec info
2021-10-29 16:01:04 -04:00
Emily Rockman
a3dc5efda7 context call sites (#4164)
* updated context dir to new structured logging
2021-10-29 10:12:09 -05:00
Nathaniel May
1015b89dbf Initial structured logging work with fire_event (#4137)
add event type modeling and fire_event calls
2021-10-29 09:16:06 -04:00
Nathaniel May
5c9fd07050 init 2021-10-26 13:57:30 -04:00
259 changed files with 3830 additions and 10795 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.0.0
current_version = 1.0.0b2
parse = (?P<major>\d+)
\.(?P<minor>\d+)
\.(?P<patch>\d+)
@@ -35,5 +35,3 @@ first_value = 1
[bumpversion:file:plugins/postgres/setup.py]
[bumpversion:file:plugins/postgres/dbt/adapters/postgres/__version__.py]
[bumpversion:file:docker/requirements/requirements.txt]

View File

@@ -1,6 +1,6 @@
module.exports = ({ context }) => {
const defaultPythonVersion = "3.8";
const supportedPythonVersions = ["3.7", "3.8", "3.9"];
const supportedPythonVersions = ["3.6", "3.7", "3.8", "3.9"];
const supportedAdapters = ["postgres"];
// if PR, generate matrix based on files changed and PR labels

View File

@@ -77,7 +77,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.6, 3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
env:
TOXENV: "unit"
@@ -167,7 +167,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.7, 3.8, 3.9]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- name: Set up Python ${{ matrix.python-version }}
@@ -198,9 +198,8 @@ jobs:
dbt --version
- name: Install source distributions
# ignore dbt-1.0.0, which intentionally raises an error when installed from source
run: |
find ./dist/dbt-[a-z]*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |

View File

@@ -1,200 +0,0 @@
# **what?**
# Take the given commit, run unit tests specifically on that sha, build and
# package it, and then release to GitHub and PyPi with that specific build
# **why?**
# Ensure an automated and tested release process
# **when?**
# This will only run manually with a given sha and version
name: Release to GitHub and PyPi
on:
workflow_dispatch:
inputs:
sha:
description: 'The last commit sha in the release'
required: true
version_number:
description: 'The release version number (i.e. 1.0.0b1)'
required: true
defaults:
run:
shell: bash
jobs:
unit:
name: Unit test
runs-on: ubuntu-latest
env:
TOXENV: "unit"
steps:
- name: Check out the repository
uses: actions/checkout@v2
with:
persist-credentials: false
ref: ${{ github.event.inputs.sha }}
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install tox
pip --version
tox --version
- name: Run tox
run: tox
build:
name: build packages
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v2
with:
persist-credentials: false
ref: ${{ github.event.inputs.sha }}
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install --upgrade setuptools wheel twine check-wheel-contents
pip --version
- name: Build distributions
run: ./scripts/build-dist.sh
- name: Show distributions
run: ls -lh dist/
- name: Check distribution descriptions
run: |
twine check dist/*
- name: Check wheel contents
run: |
check-wheel-contents dist/*.whl --ignore W007,W008
- uses: actions/upload-artifact@v2
with:
name: dist
path: |
dist/
!dist/dbt-${{github.event.inputs.version_number}}.tar.gz
test-build:
name: verify packages
needs: [build, unit]
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install --upgrade wheel
pip --version
- uses: actions/download-artifact@v2
with:
name: dist
path: dist/
- name: Show distributions
run: ls -lh dist/
- name: Install wheel distributions
run: |
find ./dist/*.whl -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check wheel distributions
run: |
dbt --version
- name: Install source distributions
run: |
find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |
dbt --version
github-release:
name: GitHub Release
needs: test-build
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v2
with:
name: dist
path: '.'
# Need to set an output variable because env variables can't be taken as input
# This is needed for the next step with releasing to GitHub
- name: Find release type
id: release_type
env:
IS_PRERELEASE: ${{ contains(github.event.inputs.version_number, 'rc') || contains(github.event.inputs.version_number, 'b') }}
run: |
echo ::set-output name=isPrerelease::$IS_PRERELEASE
- name: Creating GitHub Release
uses: softprops/action-gh-release@v1
with:
name: dbt-core v${{github.event.inputs.version_number}}
tag_name: v${{github.event.inputs.version_number}}
prerelease: ${{ steps.release_type.outputs.isPrerelease }}
target_commitish: ${{github.event.inputs.sha}}
body: |
[Release notes](https://github.com/dbt-labs/dbt-core/blob/main/CHANGELOG.md)
files: |
dbt_postgres-${{github.event.inputs.version_number}}-py3-none-any.whl
dbt_core-${{github.event.inputs.version_number}}-py3-none-any.whl
dbt-postgres-${{github.event.inputs.version_number}}.tar.gz
dbt-core-${{github.event.inputs.version_number}}.tar.gz
pypi-release:
name: Pypi release
runs-on: ubuntu-latest
needs: github-release
environment: PypiProd
steps:
- uses: actions/download-artifact@v2
with:
name: dist
path: 'dist'
- name: Publish distribution to PyPI
uses: pypa/gh-action-pypi-publish@v1.4.2
with:
password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -1,71 +0,0 @@
# This Action checks makes a dbt run to sample json structured logs
# and checks that they conform to the currently documented schema.
#
# If this action fails it either means we have unintentionally deviated
# from our documented structured logging schema, or we need to bump the
# version of our structured logging and add new documentation to
# communicate these changes.
name: Structured Logging Schema Check
on:
push:
branches:
- "main"
- "*.latest"
- "releases/*"
pull_request:
workflow_dispatch:
permissions: read-all
jobs:
# run the performance measurements on the current or default branch
test-schema:
name: Test Log Schema
runs-on: ubuntu-latest
env:
# turns warnings into errors
RUSTFLAGS: "-D warnings"
# points tests to the log file
LOG_DIR: "/home/runner/work/dbt-core/dbt-core/logs"
# tells integration tests to output into json format
DBT_LOG_FORMAT: 'json'
steps:
- name: checkout dev
uses: actions/checkout@v2
with:
persist-credentials: false
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: "3.8"
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: install dbt
run: pip install -r dev-requirements.txt -r editable-requirements.txt
- name: Set up postgres
uses: ./.github/actions/setup-postgres-linux
- name: ls
run: ls
# integration tests generate a ton of logs in different files. the next step will find them all.
# we actually care if these pass, because the normal test run doesn't usually include many json log outputs
- name: Run integration tests
run: tox -e py38-postgres -- -nauto
# apply our schema tests to every log event from the previous step
# skips any output that isn't valid json
- uses: actions-rs/cargo@v1
with:
command: run
args: --manifest-path test/interop/log_parsing/Cargo.toml

View File

@@ -1,133 +1,4 @@
## dbt-core 1.0.1 (TBD)
### Fixes
- Fix wrong url in the dbt docs overview homepage ([#4442](https://github.com/dbt-labs/dbt-core/pull/4442))
- Fix redefined status param of SQLQueryStatus to typecheck the string which passes on `._message` value of `AdapterResponse` or the `str` value sent by adapter plugin. ([#4463](https://github.com/dbt-labs/dbt-core/pull/4463#issuecomment-990174166))
- Fix `DepsStartPackageInstall` event to use package name instead of version number. ([#4482](https://github.com/dbt-labs/dbt-core/pull/4482))
Contributors:
- [remoyson](https://github.com/remoyson) ([#4442](https://github.com/dbt-labs/dbt-core/pull/4442))
## dbt-core 1.0.0 (December 3, 2021)
### Fixes
- Configure the CLI logger destination to use stdout instead of stderr ([#4368](https://github.com/dbt-labs/dbt-core/pull/4368))
- Make the size of `EVENT_HISTORY` configurable, via `EVENT_BUFFER_SIZE` global config ([#4411](https://github.com/dbt-labs/dbt-core/pull/4411), [#4416](https://github.com/dbt-labs/dbt-core/pull/4416))
- Change type of `log_format` in `profiles.yml` user config to be string, not boolean ([#4394](https://github.com/dbt-labs/dbt-core/pull/4394))
### Under the hood
- Only log cache events if `LOG_CACHE_EVENTS` is enabled, and disable by default. This restores previous behavior ([#4369](https://github.com/dbt-labs/dbt-core/pull/4369))
- Move event codes to be a top-level attribute of JSON-formatted logs, rather than nested in `data` ([#4381](https://github.com/dbt-labs/dbt-core/pull/4381))
- Fix failing integration test on Windows ([#4380](https://github.com/dbt-labs/dbt-core/pull/4380))
- Clean up warning messages for `clean` + `deps` ([#4366](https://github.com/dbt-labs/dbt-core/pull/4366))
- Use RFC3339 timestamps for log messages ([#4384](https://github.com/dbt-labs/dbt-core/pull/4384))
- Different text output for console (info) and file (debug) logs ([#4379](https://github.com/dbt-labs/dbt-core/pull/4379), [#4418](https://github.com/dbt-labs/dbt-core/pull/4418))
- Remove unused events. More structured `ConcurrencyLine`. Replace `\n` message starts/ends with `EmptyLine` events, and exclude `EmptyLine` from JSON-formatted output ([#4388](https://github.com/dbt-labs/dbt-core/pull/4388))
- Update `events` module README ([#4395](https://github.com/dbt-labs/dbt-core/pull/4395))
- Rework approach to JSON serialization for events with non-standard properties ([#4396](https://github.com/dbt-labs/dbt-core/pull/4396))
- Update legacy logger file name to `dbt.log.legacy` ([#4402](https://github.com/dbt-labs/dbt-core/pull/4402))
- Rollover `dbt.log` at 10 MB, and keep up to 5 backups, restoring previous behavior ([#4405](https://github.com/dbt-labs/dbt-core/pull/4405))
- Use reference keys instead of full relation objects in cache events ([#4410](https://github.com/dbt-labs/dbt-core/pull/4410))
- Add `node_type` contextual info to more events ([#4378](https://github.com/dbt-labs/dbt-core/pull/4378))
- Make `materialized` config optional in `node_type` ([#4417](https://github.com/dbt-labs/dbt-core/pull/4417))
- Stringify exception in `GenericExceptionOnRun` to support JSON serialization ([#4424](https://github.com/dbt-labs/dbt-core/pull/4424))
- Add "interop" tests for machine consumption of structured log output ([#4327](https://github.com/dbt-labs/dbt-core/pull/4327))
- Relax version specifier for `dbt-extractor` to `~=0.4.0`, to support compiled wheels for additional architectures when available ([#4427](https://github.com/dbt-labs/dbt-core/pull/4427))
## dbt-core 1.0.0rc3 (November 30, 2021)
### Fixes
- Support partial parsing of env_vars in metrics ([#4253](https://github.com/dbt-labs/dbt-core/issues/4293), [#4322](https://github.com/dbt-labs/dbt-core/pull/4322))
- Fix typo in `UnparsedSourceDefinition.__post_serialize__` ([#3545](https://github.com/dbt-labs/dbt-core/issues/3545), [#4349](https://github.com/dbt-labs/dbt-core/pull/4349))
### Under the hood
- Change some CompilationExceptions to ParsingExceptions ([#4254](http://github.com/dbt-labs/dbt-core/issues/4254), [#4328](https://github.com/dbt-core/pull/4328))
- Reorder logic for static parser sampling to speed up model parsing ([#4332](https://github.com/dbt-labs/dbt-core/pull/4332))
- Use more augmented assignment statements ([#4315](https://github.com/dbt-labs/dbt-core/issues/4315)), ([#4311](https://github.com/dbt-labs/dbt-core/pull/4331))
- Adjust logic when finding approximate matches for models and tests ([#3835](https://github.com/dbt-labs/dbt-core/issues/3835)), [#4076](https://github.com/dbt-labs/dbt-core/pull/4076))
- Restore small previous behaviors for logging: JSON formatting for first few events; `WARN`-level stdout for `list` task; include tracking events in `dbt.log` ([#4341](https://github.com/dbt-labs/dbt-core/pull/4341))
Contributors:
- [@sarah-weatherbee](https://github.com/sarah-weatherbee) ([#4331](https://github.com/dbt-labs/dbt-core/pull/4331))
- [@emilieschario](https://github.com/emilieschario) ([#4076](https://github.com/dbt-labs/dbt-core/pull/4076))
- [@sneznaj](https://github.com/sneznaj) ([#4349](https://github.com/dbt-labs/dbt-core/pull/4349))
## dbt-core 1.0.0rc2 (November 22, 2021)
### Breaking changes
- Restrict secret env vars (prefixed `DBT_ENV_SECRET_`) to `profiles.yml` + `packages.yml` _only_. Raise an exception if a secret env var is used elsewhere ([#4310](https://github.com/dbt-labs/dbt-core/issues/4310), [#4311](https://github.com/dbt-labs/dbt-core/pull/4311))
- Reorder arguments to `config.get()` so that `default` is second ([#4273](https://github.com/dbt-labs/dbt-core/issues/4273), [#4297](https://github.com/dbt-labs/dbt-core/pull/4297))
### Features
- Avoid error when missing column in YAML description ([#4151](https://github.com/dbt-labs/dbt-core/issues/4151), [#4285](https://github.com/dbt-labs/dbt-core/pull/4285))
- Allow `--defer` flag to `dbt snapshot` ([#4110](https://github.com/dbt-labs/dbt-core/issues/4110), [#4296](https://github.com/dbt-labs/dbt-core/pull/4296))
- Install prerelease packages when `version` explicitly references a prerelease version, regardless of `install-prerelease` status ([#4243](https://github.com/dbt-labs/dbt-core/issues/4243), [#4295](https://github.com/dbt-labs/dbt-core/pull/4295))
- Add data attributes to json log messages ([#4301](https://github.com/dbt-labs/dbt-core/pull/4301))
- Add event codes to all log events ([#4319](https://github.com/dbt-labs/dbt-core/pull/4319))
### Fixes
- Fix serialization error with missing quotes in metrics model ref ([#4252](https://github.com/dbt-labs/dbt-core/issues/4252), [#4287](https://github.com/dbt-labs/dbt-core/pull/4289))
- Correct definition of 'created_at' in ParsedMetric nodes ([#4298](http://github.com/dbt-labs/dbt-core/issues/4298), [#4299](https://github.com/dbt-labs/dbt-core/pull/4299))
### Fixes
- Allow specifying default in Jinja config.get with default keyword ([#4273](https://github.com/dbt-labs/dbt-core/issues/4273), [#4297](https://github.com/dbt-labs/dbt-core/pull/4297))
- Fix serialization error with missing quotes in metrics model ref ([#4252](https://github.com/dbt-labs/dbt-core/issues/4252), [#4287](https://github.com/dbt-labs/dbt-core/pull/4289))
- Correct definition of 'created_at' in ParsedMetric nodes ([#4298](https://github.com/dbt-labs/dbt-core/issues/4298), [#4299](https://github.com/dbt-labs/dbt-core/pull/4299))
### Under the hood
- Add --indirect-selection parameter to profiles.yml and builtin DBT_ env vars; stringified parameter to enable multi-modal use ([#3997](https://github.com/dbt-labs/dbt-core/issues/3997), [#4270](https://github.com/dbt-labs/dbt-core/pull/4270))
- Fix filesystem searcher test failure on Python 3.9 ([#3689](https://github.com/dbt-labs/dbt-core/issues/3689), [#4271](https://github.com/dbt-labs/dbt-core/pull/4271))
- Clean up deprecation warnings shown for `dbt_project.yml` config renames ([#4276](https://github.com/dbt-labs/dbt-core/issues/4276), [#4291](https://github.com/dbt-labs/dbt-core/pull/4291))
- Fix metrics count in compiled project stats ([#4290](https://github.com/dbt-labs/dbt-core/issues/4290), [#4292](https://github.com/dbt-labs/dbt-core/pull/4292))
- First pass at supporting more dbt tasks via python lib ([#4200](https://github.com/dbt-labs/dbt-core/pull/4200))
Contributors:
- [@kadero](https://github.com/kadero) ([#4285](https://github.com/dbt-labs/dbt-core/pull/4285), [#4296](https://github.com/dbt-labs/dbt-core/pull/4296))
- [@joellabes](https://github.com/joellabes) ([#4295](https://github.com/dbt-labs/dbt-core/pull/4295))
## dbt-core 1.0.0rc1 (November 10, 2021)
### Breaking changes
- Replace `greedy` flag/property for test selection with `indirect_selection: eager/cautious` flag/property. Set to `eager` by default. **Note:** This reverts test selection to its pre-v0.20 behavior by default. `dbt test -s my_model` _will_ select multi-parent tests, such as `relationships`, that depend on unselected resources. To achieve the behavior change in v0.20 + v0.21, set `--indirect-selection=cautious` on the CLI or `indirect_selection: cautious` in yaml selectors. ([#4082](https://github.com/dbt-labs/dbt-core/issues/4082), [#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- In v1.0.0, **`pip install dbt` will raise an explicit error.** Instead, please use `pip install dbt-<adapter>` (to use dbt with that database adapter), or `pip install dbt-core` (for core functionality). For parity with the previous behavior of `pip install dbt`, you can use: `pip install dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery` ([#4100](https://github.com/dbt-labs/dbt-core/issues/4100), [#4133](https://github.com/dbt-labs/dbt-core/pull/4133))
- Reorganize the `global_project` (macros) into smaller files with clearer names. Remove unused global macros: `column_list`, `column_list_for_create_table`, `incremental_upsert` ([#4154](https://github.com/dbt-labs/dbt-core/pull/4154))
- Introduce structured event interface, and begin conversion of all legacy logging ([#3359](https://github.com/dbt-labs/dbt-core/issues/3359), [#4055](https://github.com/dbt-labs/dbt-core/pull/4055))
- **This is a breaking change for adapter plugins, requiring a very simple migration.** See [`events` module README](core/dbt/events/README.md#adapter-maintainers) for details.
- If you maintain another kind of dbt-core plugin that makes heavy use of legacy logging, and you need time to cut over to the new event interface, you can re-enable the legacy logger via an environment variable shim, `DBT_ENABLE_LEGACY_LOGGER=True`. Be advised that we will remove this capability in a future version of dbt-core.
### Features
- Allow nullable `error_after` in source freshness ([#3874](https://github.com/dbt-labs/dbt-core/issues/3874), [#3955](https://github.com/dbt-labs/dbt-core/pull/3955))
- Add `metrics` nodes ([#4071](https://github.com/dbt-labs/dbt-core/issues/4071), [#4235](https://github.com/dbt-labs/dbt-core/pull/4235))
- Add support for `dbt init <project_name>`, and support for `skip_profile_setup` argument (`dbt init -s`) ([#4156](https://github.com/dbt-labs/dbt-core/issues/4156), [#4249](https://github.com/dbt-labs/dbt-core/pull/4249))
### Fixes
- Changes unit tests using `assertRaisesRegexp` to `assertRaisesRegex` ([#4136](https://github.com/dbt-labs/dbt-core/issues/4132), [#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- Allow retries when the answer from a `dbt deps` is `None` ([#4178](https://github.com/dbt-labs/dbt-core/issues/4178), [#4225](https://github.com/dbt-labs/dbt-core/pull/4225))
### Docs
- Fix non-alphabetical sort of Source Tables in source overview page ([docs#81](https://github.com/dbt-labs/dbt-docs/issues/81), [docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- Add title tag to node elements in tree ([docs#202](https://github.com/dbt-labs/dbt-docs/issues/202), [docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
- Account for test rename: `schema` &rarr; `generic`, `data` &rarr;` singular`. Use `test_metadata` instead of `schema`/`data` tags to differentiate ([docs#216](https://github.com/dbt-labs/dbt-docs/issues/216), [docs#222](https://github.com/dbt-labs/dbt-docs/pull/222))
- Add `metrics` ([core#216](https://github.com/dbt-labs/dbt-core/issues/4235), [docs#223](https://github.com/dbt-labs/dbt-docs/pull/223))
### Under the hood
- Bump artifact schema versions for 1.0.0: manifest v4, run results v4, sources v3. Notable changes: added `metrics` nodes; schema test + data test nodes are renamed to generic test + singular test nodes; freshness threshold default values ([#4191](https://github.com/dbt-labs/dbt-core/pull/4191))
- Speed up node selection by skipping `incorporate_indirect_nodes` if not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213), [#4214](https://github.com/dbt-labs/dbt-core/issues/4214))
- When `on_schema_change` is set, pass common columns as `dest_columns` in incremental merge macros ([#4144](https://github.com/dbt-labs/dbt-core/issues/4144), [#4170](https://github.com/dbt-labs/dbt-core/pull/4170))
- Clear adapters before registering in `lib` module config generation ([#4218](https://github.com/dbt-labs/dbt-core/pull/4218))
- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#4223](https://github.com/dbt-labs/dbt-core/pull/4223))
Contributors:
- [@kadero](https://github.com/kadero) ([#3955](https://github.com/dbt-labs/dbt-core/pull/3955), [#4249](https://github.com/dbt-labs/dbt-core/pull/4249))
- [@frankcash](https://github.com/frankcash) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- [@Kayrnt](https://github.com/Kayrnt) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4170))
- [@VersusFacit](https://github.com/VersusFacit) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@joellabes](https://github.com/joellabes) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@b-per](https://github.com/b-per) ([#4225](https://github.com/dbt-labs/dbt-core/pull/4225))
- [@salmonsd](https://github.com/salmonsd) ([docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- [@miike](https://github.com/miike) ([docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
## dbt-core 1.0.0 (Release TBD)
## dbt-core 1.0.0b2 (October 25, 2021)
@@ -144,16 +15,12 @@ Contributors:
- `dbt init` is now interactive, generating profiles.yml when run inside existing project ([#3625](https://github.com/dbt-labs/dbt/pull/3625))
### Under the hood
- Fix intermittent errors in partial parsing tests ([#4060](https://github.com/dbt-labs/dbt-core/issues/4060), [#4068](https://github.com/dbt-labs/dbt-core/pull/4068))
- Make finding disabled nodes more consistent ([#4069](https://github.com/dbt-labs/dbt-core/issues/4069), [#4073](https://github.com/dbt-labas/dbt-core/pull/4073))
- Remove connection from `render_with_context` during parsing, thereby removing misleading log message ([#3137](https://github.com/dbt-labs/dbt-core/issues/3137), [#4062](https://github.com/dbt-labas/dbt-core/pull/4062))
- Wait for postgres docker container to be ready in `setup_db.sh`. ([#3876](https://github.com/dbt-labs/dbt-core/issues/3876), [#3908](https://github.com/dbt-labs/dbt-core/pull/3908))
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Dependency updates ([#4079](https://github.com/dbt-labs/dbt-core/pull/4079)), ([#3532](https://github.com/dbt-labs/dbt-core/pull/3532)
- Schedule partial parsing for SQL files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4101](https://github.com/dbt-labs/dbt-core/pull/4101))
- Schedule partial parsing for schema files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4162](https://github.com/dbt-labs/dbt-core/pull/4162))
- Skip partial parsing when env_vars change in dbt_project or profile ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4212](https://github.com/dbt-labs/dbt-core/pull/4212))
Contributors:
- [@sungchun12](https://github.com/sungchun12) ([#4017](https://github.com/dbt-labs/dbt/pull/4017))
@@ -187,6 +54,7 @@ Contributors:
- Fixed bug with `error_if` test option ([#4070](https://github.com/dbt-labs/dbt-core/pull/4070))
### Under the hood
- Enact deprecation for `materialization-return` and replace deprecation warning with an exception. ([#3896](https://github.com/dbt-labs/dbt-core/issues/3896))
- Build catalog for only relational, non-ephemeral nodes in the graph ([#3920](https://github.com/dbt-labs/dbt-core/issues/3920))
- Enact deprecation to remove the `release` arg from the `execute_macro` method. ([#3900](https://github.com/dbt-labs/dbt-core/issues/3900))
@@ -199,7 +67,6 @@ Contributors:
- Update the default project paths to be `analysis-paths = ['analyses']` and `test-paths = ['tests]`. Also have starter project set `analysis-paths: ['analyses']` from now on. ([#2659](https://github.com/dbt-labs/dbt-core/issues/2659))
- Define the data type of `sources` as an array of arrays of string in the manifest artifacts. ([#3966](https://github.com/dbt-labs/dbt-core/issues/3966), [#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- Marked `source-paths` and `data-paths` as deprecated keys in `dbt_project.yml` in favor of `model-paths` and `seed-paths` respectively.([#1607](https://github.com/dbt-labs/dbt-core/issues/1607))
- Surface git errors to `stdout` when cloning dbt packages from Github. ([#3167](https://github.com/dbt-labs/dbt-core/issues/3167))
Contributors:
@@ -208,25 +75,14 @@ Contributors:
- [@samlader](https://github.com/samlader) ([#3993](https://github.com/dbt-labs/dbt-core/pull/3993))
- [@yu-iskw](https://github.com/yu-iskw) ([#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- [@laxjesse](https://github.com/laxjesse) ([#4019](https://github.com/dbt-labs/dbt-core/pull/4019))
- [@gitznik](https://github.com/Gitznik) ([#4124](https://github.com/dbt-labs/dbt-core/pull/4124))
## dbt 0.21.1 (Release TBD)
### Fixes
- Add `get_where_subquery` to test macro namespace, fixing custom generic tests that rely on introspecting the `model` arg at parse time ([#4195](https://github.com/dbt-labs/dbt/issues/4195), [#4197](https://github.com/dbt-labs/dbt/pull/4197))
## dbt 0.21.1rc1 (November 03, 2021)
### Fixes
- Performance: Use child_map to find tests for nodes in resolve_graph ([#4012](https://github.com/dbt-labs/dbt/issues/4012), [#4022](https://github.com/dbt-labs/dbt/pull/4022))
- Switch `unique_field` from abstractproperty to optional property. Add docstring ([#4025](https://github.com/dbt-labs/dbt/issues/4025), [#4028](https://github.com/dbt-labs/dbt/pull/4028))
- Include only relational nodes in `database_schema_set` ([#4063](https://github.com/dbt-labs/dbt-core/issues/4063), [#4077](https://github.com/dbt-labs/dbt-core/pull/4077))
- Added support for tests on databases that lack real boolean types. ([#4084](https://github.com/dbt-labs/dbt-core/issues/4084))
- Scrub secrets coming from `CommandError`s so they don't get exposed in logs. ([#4138](https://github.com/dbt-labs/dbt-core/pull/4138))
- Syntax fix in `alter_relation_add_remove_columns` if only removing columns in `on_schema_change: sync_all_columns` ([#4147](https://github.com/dbt-labs/dbt-core/issues/4147))
- Increase performance of graph subset selection ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135),[#4155](https://github.com/dbt-labs/dbt-core/pull/4155))
- Add downstream test edges for `build` task _only_. Restore previous graph construction, compilation performance, and node selection behavior (`test+`) for all other tasks ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135), [#4143](https://github.com/dbt-labs/dbt-core/pull/4143))
- Don't require a strict/proper subset when adding testing edges to specialized graph for `build` ([#4158](https://github.com/dbt-labs/dbt-core/issues/4135), [#4158](https://github.com/dbt-labs/dbt-core/pull/4160))
Contributors:
- [@ljhopkins2](https://github.com/ljhopkins2) ([#4077](https://github.com/dbt-labs/dbt-core/pull/4077))

View File

@@ -10,7 +10,7 @@
## About this document
This document is a guide intended for folks interested in contributing to `dbt-core`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt-core`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
This document is a guide intended for folks interested in contributing to `dbt`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the `#dbt-core-development` channel on [slack](https://community.getdbt.com).
@@ -20,101 +20,101 @@ If you have an issue or code change suggestion related to a specific database [a
### Signing the CLA
Please note that all contributors to `dbt-core` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt-core` codebase. If you are unable to sign the CLA, then the `dbt-core` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
Please note that all contributors to `dbt` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt` codebase. If you are unable to sign the CLA, then the `dbt` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
## Proposing a change
`dbt-core` is Apache 2.0-licensed open source software. `dbt-core` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
`dbt` is Apache 2.0-licensed open source software. `dbt` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
### Defining the problem
If you have an idea for a new feature or if you've discovered a bug in `dbt-core`, the first step is to open an issue. Please check the list of [open issues](https://github.com/dbt-labs/dbt-core/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt-core` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
If you have an idea for a new feature or if you've discovered a bug in `dbt`, the first step is to open an issue. Please check the list of [open issues](https://github.com/dbt-labs/dbt-core/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
> **Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
### Discussing the idea
After you open an issue, a `dbt-core` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt-core` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
After you open an issue, a `dbt` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
### Submitting a change
If an issue is appropriately well scoped and describes a beneficial change to the `dbt-core` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
If an issue is appropriately well scoped and describes a beneficial change to the `dbt` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
The `dbt-core` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/dbt-labs/dbt-core/contribute) page.
The `dbt` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/dbt-labs/dbt-core/contribute) page.
Here's a good workflow:
- Comment on the open issue, expressing your interest in contributing the required code change
- Outline your planned implementation. If you want help getting started, ask!
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt-core` maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt-core`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt` maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
In some cases, the right resolution to an open issue might be tangential to the `dbt-core` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt-core` maintainers are unwilling or unable to incorporate into the `dbt-core` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt-core` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
In some cases, the right resolution to an open issue might be tangential to the `dbt` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt` maintainers are unwilling or unable to incorporate into the `dbt` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
### Using issue labels
The `dbt-core` maintainers use labels to categorize open issues. Most labels describe the domain in the `dbt-core` codebase germane to the discussion.
The `dbt` maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the `dbt` codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
| tag | description |
| --- | ----------- |
| [triage](https://github.com/dbt-labs/dbt-core/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt-core` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/dbt-labs/dbt-core/labels/bug) | This issue represents a defect or regression in `dbt-core` |
| [enhancement](https://github.com/dbt-labs/dbt-core/labels/enhancement) | This issue represents net-new functionality in `dbt-core` |
| [good first issue](https://github.com/dbt-labs/dbt-core/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt-core` codebase to implement. This issue is appropriate for a first-time contributor. |
| [triage](https://github.com/dbt-labs/dbt-core/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/dbt-labs/dbt-core/labels/bug) | This issue represents a defect or regression in `dbt` |
| [enhancement](https://github.com/dbt-labs/dbt-core/labels/enhancement) | This issue represents net-new functionality in `dbt` |
| [good first issue](https://github.com/dbt-labs/dbt-core/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt` codebase to implement. This issue is appropriate for a first-time contributor. |
| [help wanted](https://github.com/dbt-labs/dbt-core/labels/help%20wanted) / [discussion](https://github.com/dbt-labs/dbt-core/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
| [duplicate](https://github.com/dbt-labs/dbt-core/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt-core` maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/dbt-labs/dbt-core/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt-core` maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/dbt-labs/dbt-core/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt-core` maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/dbt-labs/dbt-core/labels/wontfix) | This issue does not require a code change in the `dbt-core` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
| [duplicate](https://github.com/dbt-labs/dbt-core/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt` maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/dbt-labs/dbt-core/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt` maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/dbt-labs/dbt-core/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt` maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/dbt-labs/dbt-core/labels/wontfix) | This issue does not require a code change in the `dbt` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
#### Branching Strategy
`dbt-core` has three types of branches:
`dbt` has three types of branches:
- **Trunks** are where active development of the next release takes place. There is one trunk named `main` at the time of writing this, and will be the default branch of the repository.
- **Release Branches** track a specific, not yet complete release of `dbt-core`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt-core`.
- **Trunks** are where active development of the next release takes place. There is one trunk named `develop` at the time of writing this, and will be the default branch of the repository.
- **Release Branches** track a specific, not yet complete release of `dbt`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt`.
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk branch or a specific release branch.
## Getting the code
### Installing git
You will need `git` in order to download and modify the `dbt-core` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
You will need `git` in order to download and modify the `dbt` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
### External contributors
If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt-core` by forking the `dbt-core` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt` by forking the `dbt` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
1. fork the `dbt-core` repository
1. fork the `dbt` repository
2. clone your fork locally
3. check out a new branch for your proposed changes
4. push changes to your fork
5. open a pull request against `dbt-labs/dbt` from your forked repository
### dbt Labs contributors
### Core contributors
If you are a member of the `dbt-labs` GitHub organization, you will have push access to the `dbt-core` repo. Rather than forking `dbt-core` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
If you are a member of the `dbt-labs` GitHub organization, you will have push access to the `dbt` repo. Rather than forking `dbt` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
## Setting up an environment
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt-core` development, many of these tools are used commonly across open-source python projects.
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt` development, many of these tools are used commonly across open-source python projects.
### Tools
A short list of tools used in `dbt-core` testing that will be helpful to your understanding:
A short list of tools used in `dbt` testing that will be helpful to your understanding:
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.7, Python 3.8, and Python 3.9
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.6, Python 3.7, Python 3.8, and Python 3.9
- [`pytest`](https://docs.pytest.org/en/latest/) to discover/run tests
- [`make`](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [`flake8`](https://flake8.pycqa.org/en/latest/) for code linting
- [`mypy`](https://mypy.readthedocs.io/en/stable/) for static type checking
- [Github Actions](https://github.com/features/actions)
A deep understanding of these tools in not required to effectively contribute to `dbt-core`, but we recommend checking out the attached documentation if you're interested in learning more about them.
A deep understanding of these tools in not required to effectively contribute to `dbt`, but we recommend checking out the attached documentation if you're interested in learning more about them.
#### virtual environments
We strongly recommend using virtual environments when developing code in `dbt-core`. We recommend creating this virtualenv
in the root of the `dbt-core` repository. To create a new virtualenv, run:
We strongly recommend using virtual environments when developing code in `dbt`. We recommend creating this virtualenv
in the root of the `dbt` repository. To create a new virtualenv, run:
```sh
python3 -m venv env
source env/bin/activate
@@ -135,11 +135,11 @@ For testing, and later in the examples in this document, you may want to have `p
brew install postgresql
```
## Running `dbt-core` in development
## Running `dbt` in development
### Installation
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt-core` (and its dependencies) with:
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt` (and its dependencies) with:
```sh
make dev
@@ -147,24 +147,23 @@ make dev
pip install -r dev-requirements.txt -r editable-requirements.txt
```
When `dbt-core` is installed this way, any changes you make to the `dbt-core` source code will be reflected immediately in your next `dbt-core` run.
When `dbt` is installed this way, any changes you make to the `dbt` source code will be reflected immediately in your next `dbt` run.
### Running `dbt`
### Running `dbt-core`
With your virtualenv activated, the `dbt-core` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as necessary to connect to your target databases. It may be a good idea to add a new profile pointing to a local postgres instance, or a specific test sandbox within your data warehouse if appropriate.
## Testing
Getting the `dbt-core` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt-core`. The section that follows outlines some helpful tips for setting up the test environment.
Getting the `dbt` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt`. The section that follows outlines some helpful tips for setting up the test environment.
Although `dbt-core` works with a number of different databases, you won't need to supply credentials for every one of these databases in your test environment. Instead you can test all dbt-core code changes with Python and Postgres.
Although `dbt` works with a number of different databases, you won't need to supply credentials for every one of these databases in your test environment. Instead you can test all dbt-core code changes with Python and Postgres.
### Initial setup
We recommend starting with `dbt-core`'s Postgres tests. These tests cover most of the functionality in `dbt-core`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
We recommend starting with `dbt`'s Postgres tests. These tests cover most of the functionality in `dbt`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
```sh
make setup-db
@@ -175,6 +174,15 @@ docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
```
`dbt` uses test credentials specified in a `test.env` file in the root of the repository for non-Postgres databases. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against `dbt`. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials. This step is only required to use non-Postgres databases.
```
cp test.env.sample test.env
$EDITOR test.env
```
> In general, it's most important to have successful unit and Postgres tests. Once you open a PR, `dbt` will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
### Test commands
There are a few methods for running tests locally.
@@ -200,9 +208,9 @@ suites.
[`tox`](https://tox.readthedocs.io/en/latest/) takes care of managing virtualenvs and install dependencies in order to run
tests. You can also run tests in parallel, for example, you can run unit tests
for Python 3.7, Python 3.8, Python 3.9, `flake8` checks, and `mypy` checks in
for Python 3.6, Python 3.7, Python 3.8, `flake8` checks, and `mypy` checks in
parallel with `tox -p`. Also, you can run unit tests for specific python versions
with `tox -e py37`. The configuration for these tests in located in `tox.ini`.
with `tox -e py36`. The configuration for these tests in located in `tox.ini`.
#### `pytest`
@@ -222,8 +230,6 @@ python -m pytest test/unit/test_graph.py::GraphTest::test__dependency_list
dbt Labs provides a CI environment to test changes to specific adapters, and periodic maintenance checks of `dbt-core` through Github Actions. For example, if you submit a pull request to the `dbt-redshift` repo, GitHub will trigger automated code checks and tests against Redshift.
A `dbt-core` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
- First time contributors should note code checks + unit tests require a maintainer to approve.
A `dbt` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
Once all tests are passing and your PR has been approved, a `dbt-core` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
Once all tests are passing and your PR has been approved, a `dbt` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:

View File

@@ -1,42 +0,0 @@
<p align="center">
<img src="https://raw.githubusercontent.com/dbt-labs/dbt-core/fa1ea14ddfb1d5ae319d5141844910dd53ab2834/etc/dbt-core.svg" alt="dbt logo" width="750"/>
</p>
<p align="center">
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml/badge.svg?event=push" alt="Unit Tests Badge"/>
</a>
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml/badge.svg?event=push" alt="Integration Tests Badge"/>
</a>
</p>
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
![architecture](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-arch.png)
## Understanding dbt
Analysts using dbt can transform their data by simply writing select statements, while dbt handles turning these statements into tables and views in a data warehouse.
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
![dbt dag](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-dag.png)
## Getting started
- [Install dbt](https://docs.getdbt.com/docs/installation)
- Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/)
## Join the dbt Community
- Be part of the conversation in the [dbt Community Slack](http://community.getdbt.com/)
- Read more on the [dbt Community Discourse](https://discourse.getdbt.com)
## Reporting bugs and contributing code
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-core/issues/new)
- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md)
## Code of Conduct
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct).

View File

@@ -18,17 +18,7 @@ from dbt.contracts.graph.manifest import Manifest
from dbt.adapters.base.query_headers import (
MacroQueryStringSetter,
)
from dbt.events.functions import fire_event
from dbt.events.types import (
NewConnection,
ConnectionReused,
ConnectionLeftOpen,
ConnectionLeftOpen2,
ConnectionClosed,
ConnectionClosed2,
Rollback,
RollbackFailed
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import flags
@@ -146,10 +136,14 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
if conn.name == conn_name and conn.state == 'open':
return conn
fire_event(NewConnection(conn_name=conn_name, conn_type=self.TYPE))
logger.debug(
'Acquiring new {} connection "{}".'.format(self.TYPE, conn_name))
if conn.state == 'open':
fire_event(ConnectionReused(conn_name=conn_name))
logger.debug(
'Re-using an available connection from the pool (formerly {}).'
.format(conn.name)
)
else:
conn.handle = LazyHandle(self.open)
@@ -196,9 +190,11 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
with self.lock:
for connection in self.thread_connections.values():
if connection.state not in {'closed', 'init'}:
fire_event(ConnectionLeftOpen(conn_name=connection.name))
logger.debug("Connection '{}' was left open."
.format(connection.name))
else:
fire_event(ConnectionClosed(conn_name=connection.name))
logger.debug("Connection '{}' was properly closed."
.format(connection.name))
self.close(connection)
# garbage collect these connections
@@ -224,17 +220,20 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
try:
connection.handle.rollback()
except Exception:
fire_event(RollbackFailed(conn_name=connection.name))
logger.debug(
'Failed to rollback {}'.format(connection.name),
exc_info=True
)
@classmethod
def _close_handle(cls, connection: Connection) -> None:
"""Perform the actual close operation."""
# On windows, sometimes connection handles don't have a close() attr.
if hasattr(connection.handle, 'close'):
fire_event(ConnectionClosed2(conn_name=connection.name))
logger.debug(f'On {connection.name}: Close')
connection.handle.close()
else:
fire_event(ConnectionLeftOpen2(conn_name=connection.name))
logger.debug(f'On {connection.name}: No close available on handle')
@classmethod
def _rollback(cls, connection: Connection) -> None:
@@ -245,7 +244,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
f'"{connection.name}", but it does not have one open!'
)
fire_event(Rollback(conn_name=connection.name))
logger.debug(f'On {connection.name}: ROLLBACK')
cls._rollback_handle(connection)
connection.transaction_open = False
@@ -257,7 +256,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
return connection
if connection.transaction_open and connection.handle:
fire_event(Rollback(conn_name=connection.name))
logger.debug('On {}: ROLLBACK'.format(connection.name))
cls._rollback_handle(connection)
connection.transaction_open = False

View File

@@ -29,8 +29,7 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.manifest import Manifest, MacroManifest
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.exceptions import warn_or_error
from dbt.events.functions import fire_event
from dbt.events.types import CacheMiss, ListRelations
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import filter_null_values, executor
from dbt.adapters.base.connections import Connection, AdapterResponse
@@ -39,7 +38,7 @@ from dbt.adapters.base.relation import (
ComponentName, BaseRelation, InformationSchema, SchemaSearchMap
)
from dbt.adapters.base import Column as BaseColumn
from dbt.adapters.cache import RelationsCache, _make_key
from dbt.adapters.cache import RelationsCache
SeedModel = Union[ParsedSeedNode, CompiledSeedNode]
@@ -289,12 +288,9 @@ class BaseAdapter(metaclass=AdapterMeta):
"""Check if the schema is cached, and by default logs if it is not."""
if (database, schema) not in self.cache:
fire_event(
CacheMiss(
conn_name=self.nice_connection_name(),
database=database,
schema=schema
)
logger.debug(
'On "{}": cache miss for schema "{}.{}", this is inefficient'
.format(self.nice_connection_name(), database, schema)
)
return False
else:
@@ -676,12 +672,9 @@ class BaseAdapter(metaclass=AdapterMeta):
relations = self.list_relations_without_caching(
schema_relation
)
fire_event(ListRelations(
database=database,
schema=schema,
relations=[_make_key(x) for x in relations]
))
logger.debug('with database={}, schema={}, relations={}'
.format(database, schema, relations))
return relations
def _make_match_kwargs(
@@ -975,10 +968,10 @@ class BaseAdapter(metaclass=AdapterMeta):
'dbt could not find a macro with the name "{}" in {}'
.format(macro_name, package_name)
)
# This causes a reference cycle, as generate_runtime_macro_context()
# This causes a reference cycle, as generate_runtime_macro()
# ends up calling get_adapter, so the import has to be here.
from dbt.context.providers import generate_runtime_macro_context
macro_context = generate_runtime_macro_context(
from dbt.context.providers import generate_runtime_macro
macro_context = generate_runtime_macro(
macro=macro,
config=self.config,
manifest=manifest,

View File

@@ -89,10 +89,7 @@ class BaseRelation(FakeAPIObject, Hashable):
if not self._is_exactish_match(k, v):
exact_match = False
if (
self.path.get_lowered_part(k).strip(self.quote_character) !=
v.lower().strip(self.quote_character)
):
if self.path.get_lowered_part(k) != v.lower():
approximate_match = False
if approximate_match and not exact_match:

View File

@@ -1,26 +1,23 @@
import threading
from collections import namedtuple
from copy import deepcopy
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from typing import List, Iterable, Optional, Dict, Set, Tuple, Any
import threading
from dbt.adapters.reference_keys import _make_key, _ReferenceKey
import dbt.exceptions
from dbt.events.functions import fire_event
from dbt.events.types import (
AddLink,
AddRelation,
DropCascade,
DropMissingRelation,
DropRelation,
DumpAfterAddGraph,
DumpAfterRenameSchema,
DumpBeforeAddGraph,
DumpBeforeRenameSchema,
RenameSchema,
TemporaryRelation,
UncachedRelation,
UpdateReference
)
from dbt.logger import CACHE_LOGGER as logger
from dbt.utils import lowercase
import dbt.exceptions
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
def _make_key(relation) -> _ReferenceKey:
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
to keep track of quoting
"""
# databases and schemas can both be None
return _ReferenceKey(lowercase(relation.database),
lowercase(relation.schema),
lowercase(relation.identifier))
def dot_separated(key: _ReferenceKey) -> str:
@@ -160,6 +157,12 @@ class _CachedRelation:
return [dot_separated(r) for r in self.referenced_by]
def lazy_log(msg, func):
if logger.disabled:
return
logger.debug(msg.format(func()))
class RelationsCache:
"""A cache of the relations known to dbt. Keeps track of relationships
declared between tables and handles renames/drops as a real database would.
@@ -275,7 +278,6 @@ class RelationsCache:
referenced.add_reference(dependent)
# TODO: Is this dead code? I can't seem to find it grepping the codebase.
def add_link(self, referenced, dependent):
"""Add a link between two relations to the database. If either relation
does not exist, it will be added as an "external" relation.
@@ -295,7 +297,11 @@ class RelationsCache:
# if we have not cached the referenced schema at all, we must be
# referring to a table outside our control. There's no need to make
# a link - we will never drop the referenced relation during a run.
fire_event(UncachedRelation(dep_key=dependent, ref_key=ref_key))
logger.debug(
'{dep!s} references {ref!s} but {ref.database}.{ref.schema} '
'is not in the cache, skipping assumed external relation'
.format(dep=dependent, ref=ref_key)
)
return
if ref_key not in self.relations:
# Insert a dummy "external" relation.
@@ -311,7 +317,9 @@ class RelationsCache:
type=referenced.External
)
self.add(dependent)
fire_event(AddLink(dep_key=dep_key, ref_key=ref_key))
logger.debug(
'adding link, {!s} references {!s}'.format(dep_key, ref_key)
)
with self.lock:
self._add_link(ref_key, dep_key)
@@ -322,12 +330,14 @@ class RelationsCache:
:param BaseRelation relation: The underlying relation.
"""
cached = _CachedRelation(relation)
fire_event(AddRelation(relation=_make_key(cached)))
fire_event(DumpBeforeAddGraph(dump=self.dump_graph()))
logger.debug('Adding relation: {!s}'.format(cached))
lazy_log('before adding: {!s}', self.dump_graph)
with self.lock:
self._setdefault(cached)
fire_event(DumpAfterAddGraph(dump=self.dump_graph()))
lazy_log('after adding: {!s}', self.dump_graph)
def _remove_refs(self, keys):
"""Removes all references to all entries in keys. This does not
@@ -349,10 +359,13 @@ class RelationsCache:
:param _CachedRelation dropped: An existing _CachedRelation to drop.
"""
if dropped not in self.relations:
fire_event(DropMissingRelation(relation=dropped))
logger.debug('dropped a nonexistent relationship: {!s}'
.format(dropped))
return
consequences = self.relations[dropped].collect_consequences()
fire_event(DropCascade(dropped=dropped, consequences=consequences))
logger.debug(
'drop {} is cascading to {}'.format(dropped, consequences)
)
self._remove_refs(consequences)
def drop(self, relation):
@@ -367,7 +380,7 @@ class RelationsCache:
:param str identifier: The identifier of the relation to drop.
"""
dropped = _make_key(relation)
fire_event(DropRelation(dropped=dropped))
logger.debug('Dropping relation: {!s}'.format(dropped))
with self.lock:
self._drop_cascade_relation(dropped)
@@ -390,8 +403,9 @@ class RelationsCache:
# update all the relations that refer to it
for cached in self.relations.values():
if cached.is_referenced_by(old_key):
fire_event(
UpdateReference(old_key=old_key, new_key=new_key, cached_key=cached.key())
logger.debug(
'updated reference from {0} -> {2} to {1} -> {2}'
.format(old_key, new_key, cached.key())
)
cached.rename_key(old_key, new_key)
@@ -421,7 +435,10 @@ class RelationsCache:
)
if old_key not in self.relations:
fire_event(TemporaryRelation(key=old_key))
logger.debug(
'old key {} not found in self.relations, assuming temporary'
.format(old_key)
)
return False
return True
@@ -439,9 +456,11 @@ class RelationsCache:
"""
old_key = _make_key(old)
new_key = _make_key(new)
fire_event(RenameSchema(old_key=old_key, new_key=new_key))
logger.debug('Renaming relation {!s} to {!s}'.format(
old_key, new_key
))
fire_event(DumpBeforeRenameSchema(dump=self.dump_graph()))
lazy_log('before rename: {!s}', self.dump_graph)
with self.lock:
if self._check_rename_constraints(old_key, new_key):
@@ -449,7 +468,7 @@ class RelationsCache:
else:
self._setdefault(_CachedRelation(new))
fire_event(DumpAfterRenameSchema(dump=self.dump_graph()))
lazy_log('after rename: {!s}', self.dump_graph)
def get_relations(
self, database: Optional[str], schema: Optional[str]

View File

@@ -8,9 +8,10 @@ from dbt.include.global_project import (
PACKAGE_PATH as GLOBAL_PROJECT_PATH,
PROJECT_NAME as GLOBAL_PROJECT_NAME,
)
from dbt.events.functions import fire_event
from dbt.events.types import AdapterImportError, PluginLoadError
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.contracts.connection import Credentials, AdapterRequiredConfig
from dbt.adapters.protocol import (
AdapterProtocol,
AdapterConfig,
@@ -66,12 +67,11 @@ class AdapterContainer:
# if we failed to import the target module in particular, inform
# the user about it via a runtime error
if exc.name == 'dbt.adapters.' + name:
fire_event(AdapterImportError(exc=exc))
raise RuntimeException(f'Could not find adapter type {name}!')
logger.info(f'Error importing adapter: {exc}')
# otherwise, the error had to have come from some underlying
# library. Log the stack trace.
fire_event(PluginLoadError())
logger.debug('', exc_info=True)
raise
plugin: AdapterPlugin = mod.Plugin
plugin_type = plugin.adapter.type()

View File

@@ -1,24 +0,0 @@
# this module exists to resolve circular imports with the events module
from collections import namedtuple
from typing import Optional
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
def lowercase(value: Optional[str]) -> Optional[str]:
if value is None:
return None
else:
return value.lower()
def _make_key(relation) -> _ReferenceKey:
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
to keep track of quoting
"""
# databases and schemas can both be None
return _ReferenceKey(lowercase(relation.database),
lowercase(relation.schema),
lowercase(relation.identifier))

View File

@@ -10,8 +10,7 @@ from dbt.adapters.base import BaseConnectionManager
from dbt.contracts.connection import (
Connection, ConnectionState, AdapterResponse
)
from dbt.events.functions import fire_event
from dbt.events.types import ConnectionUsed, SQLQuery, SQLCommit, SQLQueryStatus
from dbt.logger import GLOBAL_LOGGER as logger
class SQLConnectionManager(BaseConnectionManager):
@@ -59,7 +58,9 @@ class SQLConnectionManager(BaseConnectionManager):
connection = self.get_thread_connection()
if auto_begin and connection.transaction_open is False:
self.begin()
fire_event(ConnectionUsed(conn_type=self.TYPE, conn_name=connection.name))
logger.debug('Using {} connection "{}".'
.format(self.TYPE, connection.name))
with self.exception_handler(sql):
if abridge_sql_log:
@@ -67,17 +68,19 @@ class SQLConnectionManager(BaseConnectionManager):
else:
log_sql = sql
fire_event(SQLQuery(conn_name=connection.name, sql=log_sql))
logger.debug(
'On {connection_name}: {sql}',
connection_name=connection.name,
sql=log_sql,
)
pre = time.time()
cursor = connection.handle.cursor()
cursor.execute(sql, bindings)
fire_event(
SQLQueryStatus(
status=str(self.get_response(cursor)),
elapsed=round((time.time() - pre), 2)
)
logger.debug(
"SQL status: {status} in {elapsed:0.2f} seconds",
status=self.get_response(cursor),
elapsed=(time.time() - pre)
)
return connection, cursor
@@ -157,7 +160,7 @@ class SQLConnectionManager(BaseConnectionManager):
'Tried to commit transaction on connection "{}", but '
'it does not have one open!'.format(connection.name))
fire_event(SQLCommit(conn_name=connection.name))
logger.debug('On {}: COMMIT'.format(connection.name))
self.add_commit_query()
connection.transaction_open = False

View File

@@ -5,11 +5,8 @@ import dbt.clients.agate_helper
from dbt.contracts.connection import Connection
import dbt.exceptions
from dbt.adapters.base import BaseAdapter, available
from dbt.adapters.cache import _make_key
from dbt.adapters.sql import SQLConnectionManager
from dbt.events.functions import fire_event
from dbt.events.types import ColTypeChange, SchemaCreation, SchemaDrop
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.adapters.base.relation import BaseRelation
@@ -119,13 +116,8 @@ class SQLAdapter(BaseAdapter):
target_column.can_expand_to(reference_column):
col_string_size = reference_column.string_size()
new_type = self.Column.string_type(col_string_size)
fire_event(
ColTypeChange(
orig_type=target_column.data_type,
new_type=new_type,
table=current,
)
)
logger.debug("Changing col type from {} to {} in table {}",
target_column.data_type, new_type, current)
self.alter_column_type(current, column_name, new_type)
@@ -183,7 +175,7 @@ class SQLAdapter(BaseAdapter):
def create_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaCreation(relation=_make_key(relation)))
logger.debug('Creating schema "{}"', relation)
kwargs = {
'relation': relation,
}
@@ -194,7 +186,7 @@ class SQLAdapter(BaseAdapter):
def drop_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaDrop(relation=_make_key(relation)))
logger.debug('Dropping schema "{}".', relation)
kwargs = {
'relation': relation,
}

View File

@@ -17,16 +17,6 @@ def _is_commit(revision: str) -> bool:
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
def _raise_git_cloning_error(repo, revision, error):
stderr = error.stderr.decode('utf-8').strip()
if 'usage: git' in stderr:
stderr = stderr.split('\nusage: git')[0]
if re.match("fatal: destination path '(.+)' already exists", stderr):
raise error
dbt.exceptions.bad_package_spec(repo, revision, stderr)
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirectory=None):
has_revision = revision is not None
is_commit = _is_commit(revision or "")
@@ -51,18 +41,10 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirec
if dirname is not None:
clone_cmd.append(dirname)
try:
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
except dbt.exceptions.CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
if subdirectory:
cwd_subdir = os.path.join(cwd, dirname or '')
clone_cmd_subdir = ['git', 'sparse-checkout', 'set', subdirectory]
try:
run_cmd(cwd_subdir, clone_cmd_subdir)
except dbt.exceptions.CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
run_cmd(os.path.join(cwd, dirname or ''), ['git', 'sparse-checkout', 'set', subdirectory])
if remove_git_dir:
rmdir(os.path.join(dirname, '.git'))
@@ -134,10 +116,7 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
except dbt.exceptions.CommandResultError as exc:
err = exc.stderr.decode('utf-8')
exists = re.match("fatal: destination path '(.+)' already exists", err)
if not exists:
print(
'\nSomething went wrong while cloning {}'.format(repo) +
'\nCheck the debug logs for more information')
if not exists: # something else is wrong, raise it
raise
directory = None

View File

@@ -21,7 +21,7 @@ import jinja2.sandbox
from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
get_test_macro_name, deep_map_render
get_test_macro_name, deep_map
)
from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
@@ -660,7 +660,5 @@ def add_rendered_test_kwargs(
return value
# The test_metadata.kwargs come from the test builder, and were set
# when the test node was created in _parse_generic_test.
kwargs = deep_map_render(_convert_function, node.test_metadata.kwargs)
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
context[GENERIC_TEST_KWARGS_NAME] = kwargs

View File

@@ -33,10 +33,6 @@ def _get(path, registry_base_url=None):
resp = requests.get(url, timeout=30)
fire_event(RegistryProgressGETResponse(url=url, resp_code=resp.status_code))
resp.raise_for_status()
if resp is None:
raise requests.exceptions.ContentDecodingError(
'Request error: The response is None', response=resp
)
return resp.json()

View File

@@ -441,7 +441,7 @@ def run_cmd(
fire_event(SystemStdErrMsg(bmsg=err))
if proc.returncode != 0:
fire_event(SystemReportReturnCode(returncode=proc.returncode))
fire_event(SystemReportReturnCode(code=proc.returncode))
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
out, err)

View File

@@ -9,7 +9,7 @@ from dbt import flags
from dbt.adapters.factory import get_adapter
from dbt.clients import jinja
from dbt.clients.system import make_directory
from dbt.context.providers import generate_runtime_model_context
from dbt.context.providers import generate_runtime_model
from dbt.contracts.graph.manifest import Manifest, UniqueID
from dbt.contracts.graph.compiled import (
COMPILED_TYPES,
@@ -26,10 +26,9 @@ from dbt.exceptions import (
RuntimeException,
)
from dbt.graph import Graph
from dbt.events.functions import fire_event
from dbt.events.types import FoundStats, CompilingNode, WritingInjectedSQLForNode
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.events.format import pluralize
from dbt.utils import pluralize
import dbt.tracking
graph_file_name = 'graph.gpickle'
@@ -54,7 +53,6 @@ def print_compile_stats(stats):
NodeType.Seed: 'seed file',
NodeType.Source: 'source',
NodeType.Exposure: 'exposure',
NodeType.Metric: 'metric'
}
results = {k: 0 for k in names.keys()}
@@ -70,7 +68,7 @@ def print_compile_stats(stats):
if t in names
])
fire_event(FoundStats(stat_line=stat_line))
logger.info("Found {}".format(stat_line))
def _node_enabled(node: ManifestNode):
@@ -91,8 +89,6 @@ def _generate_stats(manifest: Manifest):
stats[source.resource_type] += 1
for exposure in manifest.exposures.values():
stats[exposure.resource_type] += 1
for metric in manifest.metrics.values():
stats[metric.resource_type] += 1
for macro in manifest.macros.values():
stats[macro.resource_type] += 1
return stats
@@ -182,7 +178,7 @@ class Compiler:
extra_context: Dict[str, Any],
) -> Dict[str, Any]:
context = generate_runtime_model_context(
context = generate_runtime_model(
node, self.config, manifest
)
context.update(extra_context)
@@ -370,7 +366,7 @@ class Compiler:
if extra_context is None:
extra_context = {}
fire_event(CompilingNode(unique_id=node.unique_id))
logger.debug("Compiling {}".format(node.unique_id))
data = node.to_dict(omit_none=True)
data.update({
@@ -422,44 +418,42 @@ class Compiler:
else:
dependency_not_found(node, dependency)
def link_graph(self, linker: Linker, manifest: Manifest, add_test_edges: bool = False):
def link_graph(self, linker: Linker, manifest: Manifest):
for source in manifest.sources.values():
linker.add_node(source.unique_id)
for node in manifest.nodes.values():
self.link_node(linker, node, manifest)
for exposure in manifest.exposures.values():
self.link_node(linker, exposure, manifest)
for metric in manifest.metrics.values():
self.link_node(linker, metric, manifest)
cycle = linker.find_cycles()
if cycle:
raise RuntimeError("Found a cycle: {}".format(cycle))
if add_test_edges:
manifest.build_parent_and_child_maps()
self.add_test_edges(linker, manifest)
manifest.build_parent_and_child_maps()
def add_test_edges(self, linker: Linker, manifest: Manifest) -> None:
self.resolve_graph(linker, manifest)
def resolve_graph(self, linker: Linker, manifest: Manifest) -> None:
""" This method adds additional edges to the DAG. For a given non-test
executable node, add an edge from an upstream test to the given node if
the set of nodes the test depends on is a subset of the upstream nodes
for the given node. """
the set of nodes the test depends on is a proper/strict subset of the
upstream nodes for the given node. """
# Given a graph:
# model1 --> model2 --> model3
# | |
# | \/
# \/ test 2
# | |
# | \/
# \/ test 2
# test1
#
# Produce the following graph:
# model1 --> model2 --> model3
# | /\ | /\ /\
# | | \/ | |
# \/ | test2 ----| |
# test1 ----|---------------|
# | | /\ /\
# | \/ | |
# \/ test2 ------- |
# test1 -------------------
for node_id in linker.graph:
# If node is executable (in manifest.nodes) and does _not_
@@ -497,19 +491,21 @@ class Compiler:
)
# If the set of nodes that an upstream test depends on
# is a subset of all upstream nodes of the current node,
# add an edge from the upstream test to the current node.
if (test_depends_on.issubset(upstream_nodes)):
# is a proper (or strict) subset of all upstream nodes of
# the current node, add an edge from the upstream test
# to the current node. Must be a proper/strict subset to
# avoid adding a circular dependency to the graph.
if (test_depends_on < upstream_nodes):
linker.graph.add_edge(
upstream_test,
node_id
)
def compile(self, manifest: Manifest, write=True, add_test_edges=False) -> Graph:
def compile(self, manifest: Manifest, write=True) -> Graph:
self.initialize()
linker = Linker()
self.link_graph(linker, manifest, add_test_edges)
self.link_graph(linker, manifest)
stats = _generate_stats(manifest)
@@ -524,7 +520,7 @@ class Compiler:
if (not node.extra_ctes_injected or
node.resource_type == NodeType.Snapshot):
return node
fire_event(WritingInjectedSQLForNode(unique_id=node.unique_id))
logger.debug(f'Writing injected SQL for node "{node.unique_id}"')
if node.compiled_sql:
node.compiled_path = node.write_node(

View File

@@ -15,8 +15,7 @@ from dbt.exceptions import DbtProjectError
from dbt.exceptions import ValidationException
from dbt.exceptions import RuntimeException
from dbt.exceptions import validator_error_message
from dbt.events.types import MissingProfileTarget
from dbt.events.functions import fire_event
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import coerce_dict_str
from .renderer import ProfileRenderer
@@ -92,7 +91,6 @@ class Profile(HasCredentials):
user_config: UserConfig
threads: int
credentials: Credentials
profile_env_vars: Dict[str, Any]
def __init__(
self,
@@ -110,7 +108,6 @@ class Profile(HasCredentials):
self.user_config = user_config
self.threads = threads
self.credentials = credentials
self.profile_env_vars = {} # never available on init
def to_profile_info(
self, serialize_credentials: bool = False
@@ -294,7 +291,10 @@ class Profile(HasCredentials):
target_name = renderer.render_value(raw_profile['target'])
else:
target_name = 'default'
fire_event(MissingProfileTarget(profile_name=profile_name, target_name=target_name))
logger.debug(
"target not specified in profile '{}', using '{}'"
.format(profile_name, target_name)
)
raw_profile_data = cls._get_profile_data(
raw_profile, profile_name, target_name

View File

@@ -284,7 +284,6 @@ class PartialProject(RenderComponents):
selectors_dict=rendered_selectors,
)
# Called by 'collect_parts' in RuntimeConfig
def render(self, renderer: DbtProjectYamlRenderer) -> 'Project':
try:
rendered = self.get_rendered(renderer)
@@ -305,7 +304,7 @@ class PartialProject(RenderComponents):
)
raise DbtProjectError(msg.format(deprecated_path=deprecated_path,
exp_path=exp_path))
deprecations.warn(f'project-config-{deprecated_path}',
deprecations.warn('project_config_path',
deprecated_path=deprecated_path,
exp_path=exp_path)
@@ -398,8 +397,6 @@ class PartialProject(RenderComponents):
vars_dict = cfg.vars
vars_value = VarProvider(vars_dict)
# There will never be any project_env_vars when it's first created
project_env_vars: Dict[str, Any] = {}
on_run_start: List[str] = value_or(cfg.on_run_start, [])
on_run_end: List[str] = value_or(cfg.on_run_end, [])
@@ -447,7 +444,6 @@ class PartialProject(RenderComponents):
vars=vars_value,
config_version=cfg.config_version,
unrendered=unrendered,
project_env_vars=project_env_vars,
)
# sanity check - this means an internal issue
project.validate()
@@ -560,7 +556,6 @@ class Project:
query_comment: QueryComment
config_version: int
unrendered: RenderComponents
project_env_vars: Dict[str, Any]
@property
def all_source_paths(self) -> List[str]:
@@ -569,13 +564,6 @@ class Project:
self.analysis_paths, self.macro_paths
)
@property
def generic_test_paths(self):
generic_test_paths = []
for test_path in self.test_paths:
generic_test_paths.append(os.path.join(test_path, 'generic'))
return generic_test_paths
def __str__(self):
cfg = self.to_project_config(with_packages=True)
return str(cfg)
@@ -650,6 +638,26 @@ class Project:
verify_version=verify_version,
)
@classmethod
def render_from_dict(
cls,
project_root: str,
project_dict: Dict[str, Any],
packages_dict: Dict[str, Any],
selectors_dict: Dict[str, Any],
renderer: DbtProjectYamlRenderer,
*,
verify_version: bool = False
) -> 'Project':
partial = PartialProject.from_dicts(
project_root=project_root,
project_dict=project_dict,
packages_dict=packages_dict,
selectors_dict=selectors_dict,
verify_version=verify_version,
)
return partial.render(renderer)
@classmethod
def from_project_root(
cls,

View File

@@ -1,14 +1,12 @@
from typing import Dict, Any, Tuple, Optional, Union, Callable
from dbt.clients.jinja import get_rendered, catch_jinja
from dbt.context.target import TargetContext
from dbt.context.secret import SecretContext
from dbt.context.base import BaseContext
from dbt.contracts.connection import HasCredentials
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
from dbt.utils import deep_map_render
from dbt.node_types import NodeType
from dbt.utils import deep_map
Keypath = Tuple[Union[str, int], ...]
@@ -49,7 +47,7 @@ class BaseRenderer:
self, data: Dict[str, Any]
) -> Dict[str, Any]:
try:
return deep_map_render(self.render_entry, data)
return deep_map(self.render_entry, data)
except RecursionException:
raise DbtProjectError(
f'Cycle detected: {self.name} input has a reference to itself',
@@ -101,23 +99,6 @@ class ProjectPostprocessor(Dict[Keypath, Callable[[Any], Any]]):
class DbtProjectYamlRenderer(BaseRenderer):
_KEYPATH_HANDLERS = ProjectPostprocessor()
def __init__(
self, profile: Optional[HasCredentials] = None,
cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars. This is almost always
# a TargetContext, but in the debug task we want a project
# even when we don't have a profile.
if cli_vars is None:
cli_vars = {}
if profile:
self.ctx_obj = TargetContext(profile, cli_vars)
else:
self.ctx_obj = BaseContext(cli_vars) # type:ignore
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
'Project config'
@@ -176,36 +157,82 @@ class DbtProjectYamlRenderer(BaseRenderer):
return True
class ProfileRenderer(BaseRenderer):
@property
def name(self):
'Profile'
class SchemaYamlRenderer(BaseRenderer):
DOCUMENTABLE_NODES = frozenset(
n.pluralize() for n in NodeType.documentable()
)
@property
def name(self):
return 'Rendering yaml'
def _is_norender_key(self, keypath: Keypath) -> bool:
"""
models:
- name: blah
- description: blah
tests: ...
- columns:
- name:
- description: blah
tests: ...
Return True if it's tests or description - those aren't rendered
"""
if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
return True
if (
len(keypath) >= 4 and
keypath[1] == 'columns' and
keypath[3] in ('tests', 'description')
):
return True
return False
# don't render descriptions or test keyword arguments
def should_render_keypath(self, keypath: Keypath) -> bool:
if len(keypath) < 2:
return True
if keypath[0] not in self.DOCUMENTABLE_NODES:
return True
if len(keypath) < 3:
return True
if keypath[0] == NodeType.Source.pluralize():
if keypath[2] == 'description':
return False
if keypath[2] == 'tables':
if self._is_norender_key(keypath[3:]):
return False
elif keypath[0] == NodeType.Macro.pluralize():
if keypath[2] == 'arguments':
if self._is_norender_key(keypath[3:]):
return False
elif self._is_norender_key(keypath[1:]):
return False
else: # keypath[0] in self.DOCUMENTABLE_NODES:
if self._is_norender_key(keypath[1:]):
return False
return True
class PackageRenderer(BaseRenderer):
@property
def name(self):
return 'Packages config'
class SelectorRenderer(BaseRenderer):
@property
def name(self):
return 'Selector config'
class SecretRenderer(BaseRenderer):
def __init__(
self, cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars.
if cli_vars is None:
cli_vars = {}
self.ctx_obj = SecretContext(cli_vars)
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
return 'Secret'
class ProfileRenderer(SecretRenderer):
@property
def name(self):
return 'Profile'
class PackageRenderer(SecretRenderer):
@property
def name(self):
return 'Packages config'

View File

@@ -16,11 +16,12 @@ from dbt import flags
from dbt import tracking
from dbt.adapters.factory import get_relation_class_by_name, get_include_paths
from dbt.helper_types import FQNPath, PathSet
from dbt.context.base import generate_base_context
from dbt.context.target import generate_target_context
from dbt.contracts.connection import AdapterRequiredConfig, Credentials
from dbt.contracts.graph.manifest import ManifestMetadata
from dbt.contracts.relation import ComponentName
from dbt.events.types import ProfileLoadError, ProfileNotFound
from dbt.events.functions import fire_event
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.ui import warning_tag
from dbt.contracts.project import Configuration, UserConfig
@@ -59,7 +60,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def __post_init__(self):
self.validate()
# Called by 'new_project' and 'from_args'
@classmethod
def from_parts(
cls,
@@ -116,8 +116,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name=profile.profile_name,
target_name=profile.target_name,
user_config=profile.user_config,
@@ -128,7 +126,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
dependencies=dependencies,
)
# Called by 'load_projects' in this class
def new_project(self, project_root: str) -> 'RuntimeConfig':
"""Given a new project root, read in its project dictionary, supply the
existing project's profile info, and create a new project file.
@@ -143,7 +140,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
profile.validate()
# load the new project and its packages. Don't pass cli variables.
renderer = DbtProjectYamlRenderer(profile)
renderer = DbtProjectYamlRenderer(generate_target_context(profile, {}))
project = Project.from_project_root(
project_root,
@@ -151,14 +148,14 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
verify_version=bool(flags.VERSION_CHECK),
)
runtime_config = self.from_parts(
cfg = self.from_parts(
project=project,
profile=profile,
args=deepcopy(self.args),
)
# force our quoting back onto the new project.
runtime_config.quoting = deepcopy(self.quoting)
return runtime_config
cfg.quoting = deepcopy(self.quoting)
return cfg
def serialize(self) -> Dict[str, Any]:
"""Serialize the full configuration to a single dictionary. For any
@@ -208,26 +205,21 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
)
# build the profile using the base renderer and the one fact we know
# Note: only the named profile section is rendered. The rest of the
# profile is ignored.
cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}'))
profile_renderer = ProfileRenderer(cli_vars)
profile_renderer = ProfileRenderer(generate_base_context(cli_vars))
profile_name = partial.render_profile_name(profile_renderer)
profile = cls._get_rendered_profile(
args, profile_renderer, profile_name
)
# Save env_vars encountered in rendering for partial parsing
profile.profile_env_vars = profile_renderer.ctx_obj.env_vars
# get a new renderer using our target information and render the
# project
project_renderer = DbtProjectYamlRenderer(profile, cli_vars)
ctx = generate_target_context(profile, cli_vars)
project_renderer = DbtProjectYamlRenderer(ctx)
project = partial.render(project_renderer)
# Save env_vars encountered in rendering for partial parsing
project.project_env_vars = project_renderer.ctx_obj.env_vars
return (project, profile)
# Called in main.py, lib.py, task/base.py
@classmethod
def from_args(cls, args: Any) -> 'RuntimeConfig':
"""Given arguments, read in dbt_project.yml from the current directory,
@@ -261,7 +253,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
) -> PathSet:
for key, value in config.items():
if isinstance(value, dict) and not key.startswith('+'):
self._get_config_paths(value, path + (key,), paths)
self._get_v2_config_paths(value, path + (key,), paths)
else:
paths.add(path)
return frozenset(paths)
@@ -368,7 +360,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def clear_dependencies(self):
self.dependencies = None
# Called by 'load_dependencies' in this class
def load_projects(
self, paths: Iterable[Path]
) -> Iterator[Tuple[str, 'RuntimeConfig']]:
@@ -521,8 +512,6 @@ class UnsetProfileConfig(RuntimeConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name='',
target_name='',
user_config=UnsetConfig(),
@@ -545,12 +534,13 @@ class UnsetProfileConfig(RuntimeConfig):
args, profile_renderer, profile_name
)
except (DbtProjectError, DbtProfileError) as exc:
selected_profile_name = Profile.pick_profile_name(
args_profile_name=getattr(args, 'profile', None),
project_profile_name=profile_name
logger.debug(
'Profile not loaded due to error: {}', exc, exc_info=True
)
logger.info(
'No profile "{}" found, continuing with no target',
profile_name
)
fire_event(ProfileLoadError(exc=exc))
fire_event(ProfileNotFound(profile_name=selected_profile_name))
# return the poisoned form
profile = UnsetProfile()
# disable anonymous usage statistics

View File

@@ -1,9 +1,8 @@
from typing import Dict, Any
from dbt.clients import yaml_helper
from dbt.events.functions import fire_event
from dbt.exceptions import raise_compiler_error, ValidationException
from dbt.events.types import InvalidVarsYAML
from dbt.logger import GLOBAL_LOGGER as logger
def parse_cli_vars(var_string: str) -> Dict[str, Any]:
@@ -18,5 +17,7 @@ def parse_cli_vars(var_string: str) -> Dict[str, Any]:
"The --vars argument must be a YAML dictionary, but was "
"of type '{}'".format(type_name))
except ValidationException:
fire_event(InvalidVarsYAML())
logger.error(
"The YAML provided in the --vars argument is not valid.\n"
)
raise

View File

@@ -6,16 +6,13 @@ from typing import (
from dbt import flags
from dbt import tracking
from dbt.clients.jinja import get_rendered
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import (
raise_compiler_error, MacroReturn, raise_parsing_error, disallow_secret_env_var
)
from dbt.logger import SECRET_ENV_PREFIX
from dbt.events.functions import fire_event, get_invocation_id
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.events.functions import fire_event
from dbt.events.types import MacroEventInfo, MacroEventDebug
from dbt.version import __version__ as dbt_version
@@ -25,39 +22,6 @@ import pytz
import datetime
import re
# Contexts in dbt Core
# Contexts are used for Jinja rendering. They include context methods,
# executable macros, and various settings that are available in Jinja.
#
# Different contexts are used in different places because we allow access
# to different methods and data in different places. Executable SQL, for
# example, includes the available macros and the model, while Jinja in
# yaml files is more limited.
#
# The context that is passed to Jinja is always in a dictionary format,
# not an actual class, so a 'to_dict()' is executed on a context class
# before it is used for rendering.
#
# Each context has a generate_<name>_context function to create the context.
# ProviderContext subclasses have different generate functions for
# parsing and for execution.
#
# Context class hierarchy
#
# BaseContext -- core/dbt/context/base.py
# SecretContext -- core/dbt/context/secret.py
# TargetContext -- core/dbt/context/target.py
# ConfiguredContext -- core/dbt/context/configured.py
# SchemaYamlContext -- core/dbt/context/configured.py
# DocsRuntimeContext -- core/dbt/context/configured.py
# MacroResolvingContext -- core/dbt/context/configured.py
# ManifestContext -- core/dbt/context/manifest.py
# QueryHeaderContext -- core/dbt/context/manifest.py
# ProviderContext -- core/dbt/context/provider.py
# MacroContext -- core/dbt/context/provider.py
# ModelContext -- core/dbt/context/provider.py
# TestContext -- core/dbt/context/provider.py
def get_pytz_module_context() -> Dict[str, Any]:
context_exports = pytz.__all__ # type: ignore
@@ -197,11 +161,9 @@ class Var:
class BaseContext(metaclass=ContextMeta):
# subclass is TargetContext
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
self.env_vars = {}
def generate_builtins(self):
builtins: Dict[str, Any] = {}
@@ -310,26 +272,20 @@ class BaseContext(metaclass=ContextMeta):
return Var(self._ctx, self.cli_vars)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
@staticmethod
def env_var(var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
return os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
self.env_vars[var] = return_value
return return_value
return default
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
undefined_error(msg)
if os.environ.get('DBT_MACRO_DEBUGGING'):
@contextmember
@@ -526,7 +482,10 @@ class BaseContext(metaclass=ContextMeta):
"""invocation_id outputs a UUID generated for this dbt run (useful for
auditing)
"""
return get_invocation_id()
if tracking.active_user is not None:
return tracking.active_user.invocation_id
else:
return None
@contextproperty
def modules(self) -> Dict[str, Any]:

View File

@@ -1,18 +1,14 @@
import os
from typing import Any, Dict, Optional
from typing import Any, Dict
from dbt.contracts.connection import AdapterRequiredConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.node_types import NodeType
from dbt.utils import MultiDict
from dbt.context.base import contextproperty, contextmember, Var
from dbt.context.base import contextproperty, Var
from dbt.context.target import TargetContext
from dbt.exceptions import raise_parsing_error, disallow_secret_env_var
class ConfiguredContext(TargetContext):
# subclasses are SchemaYamlContext, MacroResolvingContext, ManifestContext
config: AdapterRequiredConfig
def __init__(
@@ -67,18 +63,10 @@ class ConfiguredVar(Var):
return self.get_missing_var(var_name)
class SchemaYamlVars():
def __init__(self):
self.env_vars = {}
self.vars = {}
class SchemaYamlContext(ConfiguredContext):
# subclass is DocsRuntimeContext
def __init__(self, config, project_name: str, schema_yaml_vars: Optional[SchemaYamlVars]):
def __init__(self, config, project_name: str):
super().__init__(config)
self._project_name = project_name
self.schema_yaml_vars = schema_yaml_vars
@contextproperty
def var(self) -> ConfiguredVar:
@@ -86,24 +74,6 @@ class SchemaYamlContext(ConfiguredContext):
self._ctx, self.config, self._project_name
)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
if self.schema_yaml_vars:
self.schema_yaml_vars.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroResolvingContext(ConfiguredContext):
def __init__(self, config):
@@ -116,10 +86,10 @@ class MacroResolvingContext(ConfiguredContext):
)
def generate_schema_yml_context(
config: AdapterRequiredConfig, project_name: str, schema_yaml_vars: SchemaYamlVars = None
def generate_schema_yml(
config: AdapterRequiredConfig, project_name: str
) -> Dict[str, Any]:
ctx = SchemaYamlContext(config, project_name, schema_yaml_vars)
ctx = SchemaYamlContext(config, project_name)
return ctx.to_dict()

View File

@@ -23,7 +23,7 @@ class DocsRuntimeContext(SchemaYamlContext):
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project, None)
super().__init__(config, current_project)
self.node = node
self.manifest = manifest
@@ -75,7 +75,7 @@ class DocsRuntimeContext(SchemaYamlContext):
return target_doc.block_contents
def generate_runtime_docs_context(
def generate_runtime_docs(
config: RuntimeConfig,
target: Any,
manifest: Manifest,

View File

@@ -17,7 +17,6 @@ class ManifestContext(ConfiguredContext):
The given macros can override any previous context values, which will be
available as if they were accessed relative to the package name.
"""
# subclasses are QueryHeaderContext and ProviderContext
def __init__(
self,
config: AdapterRequiredConfig,

View File

@@ -16,7 +16,6 @@ from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
@@ -32,13 +31,11 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.parsed import (
ParsedMacro,
ParsedExposure,
ParsedMetric,
ParsedSeedNode,
ParsedSourceDefinition,
)
from dbt.exceptions import (
CompilationException,
ParsingException,
InternalException,
ValidationException,
RuntimeException,
@@ -50,8 +47,6 @@ from dbt.exceptions import (
ref_bad_context,
source_target_not_found,
wrapped_exports,
raise_parsing_error,
disallow_secret_env_var,
)
from dbt.config import IsFQNResource
from dbt.node_types import NodeType
@@ -327,7 +322,7 @@ class ParseConfigObject(Config):
def require(self, name, validator=None):
return ''
def get(self, name, default=None, validator=None):
def get(self, name, validator=None, default=None):
return ''
def persist_relation_docs(self) -> bool:
@@ -371,7 +366,7 @@ class RuntimeConfigObject(Config):
return to_return
def get(self, name, default=None, validator=None):
def get(self, name, validator=None, default=None):
to_return = self._lookup(name, default)
if validator is not None and default is not None:
@@ -640,7 +635,6 @@ T = TypeVar('T')
# Base context collection, used for parsing configs.
class ProviderContext(ManifestContext):
# subclasses are MacroContext, ModelContext, TestContext
def __init__(
self,
model,
@@ -1166,35 +1160,6 @@ class ProviderContext(ManifestContext):
)
raise CompilationException(msg)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file.
# If this is compiling, do not save because it's irrelevant to parsing.
if self.model and not hasattr(self.model, 'compiled'):
self.manifest.env_vars[var] = return_value
source_file = self.manifest.files[self.model.file_id]
# Schema files should never get here
if source_file.parse_file_type != 'schema':
source_file.env_vars.append(var)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroContext(ProviderContext):
"""Internally, macros can be executed like nodes, with some restrictions:
@@ -1296,7 +1261,7 @@ class ModelContext(ProviderContext):
# This is called by '_context_for', used in 'render_with_context'
def generate_parser_model_context(
def generate_parser_model(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1313,7 +1278,7 @@ def generate_parser_model_context(
return ctx.to_dict()
def generate_generate_name_macro_context(
def generate_generate_component_name_macro(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1324,7 +1289,7 @@ def generate_generate_name_macro_context(
return ctx.to_dict()
def generate_runtime_model_context(
def generate_runtime_model(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1335,7 +1300,7 @@ def generate_runtime_model_context(
return ctx.to_dict()
def generate_runtime_macro_context(
def generate_runtime_macro(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1389,44 +1354,6 @@ def generate_parse_exposure(
}
class MetricRefResolver(BaseResolver):
def __call__(self, *args) -> str:
package = None
if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
ref_invalid_args(self.model, args)
self.validate_args(name, package)
self.model.refs.append(list(args))
return ''
def validate_args(self, name, package):
if not isinstance(name, str):
raise ParsingException(
f'In a metrics section in {self.model.original_file_path} '
f'the name argument to ref() must be a string'
)
def generate_parse_metrics(
metric: ParsedMetric,
config: RuntimeConfig,
manifest: Manifest,
package_name: str,
) -> Dict[str, Any]:
project = config.load_dependencies()[package_name]
return {
'ref': MetricRefResolver(
None,
metric,
project,
manifest,
),
}
# This class is currently used by the schema parser in order
# to limit the number of macros in the context by using
# the TestMacroNamespace
@@ -1461,13 +1388,8 @@ class TestContext(ProviderContext):
# 'depends_on.macros' by using the TestMacroNamespace
def _build_test_namespace(self):
depends_on_macros = []
# all generic tests use a macro named 'get_where_subquery' to wrap 'model' arg
# see generic_test_builders.build_model_str
get_where_subquery = self.macro_resolver.macros_by_name.get('get_where_subquery')
if get_where_subquery:
depends_on_macros.append(get_where_subquery.unique_id)
if self.model.depends_on and self.model.depends_on.macros:
depends_on_macros.extend(self.model.depends_on.macros)
depends_on_macros = self.model.depends_on.macros
lookup_macros = depends_on_macros.copy()
for macro_unique_id in lookup_macros:
lookup_macro = self.macro_resolver.macros.get(macro_unique_id)
@@ -1480,30 +1402,6 @@ class TestContext(ProviderContext):
)
self.namespace = macro_namespace
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if self.model:
self.manifest.env_vars[var] = return_value
# the "model" should only be test nodes, but just in case, check
if self.model.resource_type == NodeType.Test and self.model.file_key_name:
source_file = self.manifest.files[self.model.file_id]
(yaml_key, name) = self.model.file_key_name.split('.')
source_file.add_env_var(var, yaml_key, name)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
def generate_test_context(
model: ManifestNode,

View File

@@ -1,40 +0,0 @@
import os
from typing import Any, Dict, Optional
from .base import BaseContext, contextmember
from dbt.exceptions import raise_parsing_error
class SecretContext(BaseContext):
"""This context is used in profiles.yml + packages.yml. It can render secret
env vars that aren't usable elsewhere"""
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
In this context *only*, env_var will return the actual values of
env vars prefixed with DBT_ENV_SECRET_
"""
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
def generate_secret_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
ctx = SecretContext(cli_vars)
# This is not a Mashumaro to_dict call
return ctx.to_dict()

View File

@@ -8,7 +8,6 @@ from dbt.context.base import (
class TargetContext(BaseContext):
# subclass is ConfiguredContext
def __init__(self, config: HasCredentials, cli_vars: Dict[str, Any]):
super().__init__(cli_vars=cli_vars)
self.config = config

View File

@@ -7,8 +7,7 @@ from typing import (
)
from dbt.exceptions import InternalException
from dbt.utils import translate_aliases
from dbt.events.functions import fire_event
from dbt.events.types import NewConnectionOpening
from dbt.logger import GLOBAL_LOGGER as logger
from typing_extensions import Protocol
from dbt.dataclass_schema import (
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
@@ -102,7 +101,10 @@ class LazyHandle:
self.opener = opener
def resolve(self, connection: Connection) -> Connection:
fire_event(NewConnectionOpening(connection_state=connection.state))
logger.debug(
'Opening a new connection, currently in state {}'
.format(connection.state)
)
return self.opener(connection)

View File

@@ -190,7 +190,6 @@ class SourceFile(BaseSourceFile):
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
env_vars: List[str] = field(default_factory=list)
@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
@@ -223,7 +222,6 @@ class SchemaSourceFile(BaseSourceFile):
tests: Dict[str, Any] = field(default_factory=dict)
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
# any macro patches in this file by macro unique_id.
@@ -232,7 +230,6 @@ class SchemaSourceFile(BaseSourceFile):
# Patches are only against external sources. Sources can be
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None
@@ -255,7 +252,7 @@ class SchemaSourceFile(BaseSourceFile):
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
# Remove partial parsing specific data
for key in ('pp_test_index', 'pp_dict'):
for key in ('pp_files', 'pp_test_index', 'pp_dict'):
if key in dct:
del dct[key]
return dct
@@ -302,21 +299,5 @@ class SchemaSourceFile(BaseSourceFile):
test_ids.extend(self.tests[key][name])
return test_ids
def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
if name not in self.env_vars[yaml_key]:
self.env_vars[yaml_key][name] = []
if var not in self.env_vars[yaml_key][name]:
self.env_vars[yaml_key][name].append(var)
def delete_from_env_vars(self, yaml_key, name):
# We delete all vars for this yaml_key/name because the
# entry has been scheduled for reparsing.
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
del self.env_vars[yaml_key][name]
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]
AnySourceFile = Union[SchemaSourceFile, SourceFile]

View File

@@ -6,10 +6,8 @@ from dbt.contracts.graph.parsed import (
ParsedHookNode,
ParsedModelNode,
ParsedExposure,
ParsedMetric,
ParsedResource,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -83,17 +81,11 @@ class CompiledModelNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class CompiledRPCNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class CompiledSqlNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
@dataclass
class CompiledSeedNode(CompiledNode):
# keep this in sync with ParsedSeedNode!
@@ -127,7 +119,6 @@ class CompiledGenericTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore
@@ -151,7 +142,6 @@ PARSED_TYPES: Dict[Type[CompiledNode], Type[ParsedResource]] = {
CompiledModelNode: ParsedModelNode,
CompiledHookNode: ParsedHookNode,
CompiledRPCNode: ParsedRPCNode,
CompiledSqlNode: ParsedSqlNode,
CompiledSeedNode: ParsedSeedNode,
CompiledSnapshotNode: ParsedSnapshotNode,
CompiledSingularTestNode: ParsedSingularTestNode,
@@ -164,7 +154,6 @@ COMPILED_TYPES: Dict[Type[ParsedResource], Type[CompiledNode]] = {
ParsedModelNode: CompiledModelNode,
ParsedHookNode: CompiledHookNode,
ParsedRPCNode: CompiledRPCNode,
ParsedSqlNode: CompiledSqlNode,
ParsedSeedNode: CompiledSeedNode,
ParsedSnapshotNode: CompiledSnapshotNode,
ParsedSingularTestNode: CompiledSingularTestNode,
@@ -200,7 +189,6 @@ NonSourceCompiledNode = Union[
CompiledModelNode,
CompiledHookNode,
CompiledRPCNode,
CompiledSqlNode,
CompiledGenericTestNode,
CompiledSeedNode,
CompiledSnapshotNode,
@@ -212,7 +200,6 @@ NonSourceParsedNode = Union[
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -233,10 +220,8 @@ CompileResultNode = Union[
ParsedSourceDefinition,
]
# anything that participates in the graph: sources, exposures, metrics,
# or manifest nodes
# anything that participates in the graph: sources, exposures, manifest nodes
GraphMemberNode = Union[
CompileResultNode,
ParsedExposure,
ParsedMetric,
]

View File

@@ -15,8 +15,8 @@ from dbt.contracts.graph.compiled import (
)
from dbt.contracts.graph.parsed import (
ParsedMacro, ParsedDocumentation,
ParsedSourceDefinition, ParsedExposure, ParsedMetric,
HasUniqueID, UnpatchedSourceDefinition, ManifestNodes
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
UnpatchedSourceDefinition, ManifestNodes
)
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
@@ -29,8 +29,7 @@ from dbt.exceptions import (
raise_duplicate_resource_name, raise_compiler_error,
)
from dbt.helper_types import PathSet
from dbt.events.functions import fire_event
from dbt.events.types import MergedFromState
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.ui import line_wrap_message
from dbt import flags
@@ -547,8 +546,6 @@ class ParsingInfo:
@dataclass
class ManifestStateCheck(dbtClassMixin):
vars_hash: FileHash = field(default_factory=FileHash.empty)
project_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_hash: FileHash = field(default_factory=FileHash.empty)
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)
@@ -565,7 +562,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros: MutableMapping[str, ParsedMacro] = field(default_factory=dict)
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
metrics: MutableMapping[str, ParsedMetric] = field(default_factory=dict)
selectors: MutableMapping[str, Any] = field(default_factory=dict)
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
@@ -573,7 +569,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
state_check: ManifestStateCheck = field(default_factory=ManifestStateCheck)
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
env_vars: MutableMapping[str, str] = field(default_factory=dict)
_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
@@ -633,9 +628,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def update_exposure(self, new_exposure: ParsedExposure):
_update_into(self.exposures, new_exposure)
def update_metric(self, new_metric: ParsedMetric):
_update_into(self.metrics, new_metric)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
@@ -653,10 +645,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
k: v.to_dict(omit_none=False)
for k, v in self.exposures.items()
},
'metrics': {
k: v.to_dict(omit_none=False)
for k, v in self.metrics.items()
},
'nodes': {
k: v.to_dict(omit_none=False)
for k, v in self.nodes.items()
@@ -709,12 +697,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def get_resource_fqns(self) -> Mapping[str, PathSet]:
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
all_resources = chain(
self.exposures.values(),
self.nodes.values(),
self.sources.values(),
self.metrics.values()
)
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
if resource_type_plural not in resource_fqns:
@@ -742,7 +725,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros={k: _deepcopy(v) for k, v in self.macros.items()},
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
disabled={k: _deepcopy(v) for k, v in self.disabled.items()},
@@ -755,7 +737,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.nodes.values(),
self.sources.values(),
self.exposures.values(),
self.metrics.values(),
))
forward_edges, backward_edges = build_node_edges(edge_members)
self.child_map = forward_edges
@@ -777,7 +758,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros=self.macros,
docs=self.docs,
exposures=self.exposures,
metrics=self.metrics,
selectors=self.selectors,
metadata=self.metadata,
disabled=self.disabled,
@@ -797,8 +777,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return self.sources[unique_id]
elif unique_id in self.exposures:
return self.exposures[unique_id]
elif unique_id in self.metrics:
return self.metrics[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.InternalException(
@@ -959,7 +937,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
# log up to 5 items
sample = list(islice(merged, 5))
fire_event(MergedFromState(nbr_merged=len(merged), sample=sample))
logger.debug(
f'Merged {len(merged)} items from state (sample: {sample})'
)
# Methods that were formerly in ParseResult
@@ -1025,11 +1005,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)
def add_metric(self, source_file: SchemaSourceFile, metric: ParsedMetric):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
source_file.metrics.append(metric.unique_id)
def add_disabled_nofile(self, node: CompileResultNode):
# There can be multiple disabled nodes for the same unique_id
if node.unique_id in self.disabled:
@@ -1066,7 +1041,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.macros,
self.docs,
self.exposures,
self.metrics,
self.selectors,
self.files,
self.metadata,
@@ -1074,7 +1048,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.state_check,
self.source_patches,
self.disabled,
self.env_vars,
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
@@ -1097,7 +1070,7 @@ AnyManifest = Union[Manifest, MacroManifest]
@dataclass
@schema_version('manifest', 4)
@schema_version('manifest', 3)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=(
@@ -1124,11 +1097,6 @@ class WritableManifest(ArtifactMixin):
'The exposures defined in the dbt project and its dependencies'
))
)
metrics: Mapping[UniqueID, ParsedMetric] = field(
metadata=dict(description=(
'The metrics defined in the dbt project and its dependencies'
))
)
selectors: Mapping[UniqueID, Any] = field(
metadata=dict(description=(
'The selectors defined in selectors.yml'

View File

@@ -26,10 +26,11 @@ from dbt.contracts.graph.unparsed import (
UnparsedBaseNode, FreshnessThreshold, ExternalTable,
HasYamlMetadata, MacroArgument, UnparsedSourceDefinition,
UnparsedSourceTableDefinition, UnparsedColumn, TestDef,
ExposureOwner, ExposureType, MaturityType, MetricFilter
ExposureOwner, ExposureType, MaturityType
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.exceptions import warn_or_error
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt import flags
from dbt.node_types import NodeType
@@ -150,7 +151,7 @@ class ParsedNodeMixins(dbtClassMixin):
# Note: config should already be updated
self.patch_path: Optional[str] = patch.file_id
# update created_at so process_docs will run in partial parsing
self.created_at = time.time()
self.created_at = int(time.time())
self.description = patch.description
self.columns = patch.columns
self.meta = patch.meta
@@ -192,9 +193,8 @@ class ParsedNodeDefaults(ParsedNodeMandatory):
build_path: Optional[str] = None
deferred: bool = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
config_call_dict: Dict[str, Any] = field(default_factory=dict)
_event_status: Dict[str, Any] = field(default_factory=dict)
def write_node(self, target_path: str, subdirectory: str, payload: str):
if (os.path.basename(self.path) ==
@@ -224,8 +224,6 @@ class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
def __post_serialize__(self, dct):
if 'config_call_dict' in dct:
del dct['config_call_dict']
if '_event_status' in dct:
del dct['_event_status']
return dct
@classmethod
@@ -242,8 +240,6 @@ class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
return ParsedSeedNode.from_dict(dct)
elif resource_type == 'rpc':
return ParsedRPCNode.from_dict(dct)
elif resource_type == 'sql':
return ParsedSqlNode.from_dict(dct)
elif resource_type == 'test':
if 'test_metadata' in dct:
return ParsedGenericTestNode.from_dict(dct)
@@ -345,17 +341,11 @@ class ParsedModelNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class ParsedRPCNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class ParsedSqlNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
def same_seeds(first: ParsedNode, second: ParsedNode) -> bool:
# for seeds, we check the hashes. If the hashes are different types,
# no match. If the hashes are both the same 'path', log a warning and
@@ -412,9 +402,6 @@ class ParsedSeedNode(ParsedNode):
@dataclass
class TestMetadata(dbtClassMixin, Replaceable):
name: str
# kwargs are the args that are left in the test builder after
# removing configs. They are set from the test builder when
# the test node is created.
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None
@@ -431,17 +418,12 @@ class ParsedSingularTestNode(ParsedNode):
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
@property
def test_node_type(self):
return 'singular'
@dataclass
class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
# keep this in sync with CompiledGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
@@ -456,10 +438,6 @@ class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
True
)
@property
def test_node_type(self):
return 'generic'
@dataclass
class IntermediateSnapshotNode(ParsedNode):
@@ -515,12 +493,12 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
arguments: List[MacroArgument] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
def patch(self, patch: ParsedMacroPatch):
self.patch_path: Optional[str] = patch.file_id
self.description = patch.description
self.created_at = time.time()
self.created_at = int(time.time())
self.meta = patch.meta
self.docs = patch.docs
self.arguments = patch.arguments
@@ -636,13 +614,7 @@ class ParsedSourceDefinition(
patch_path: Optional[Path] = None
unrendered_config: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
created_at: float = field(default_factory=lambda: time.time())
_event_status: Dict[str, Any] = field(default_factory=dict)
def __post_serialize__(self, dct):
if '_event_status' in dct:
del dct['_event_status']
return dct
created_at: int = field(default_factory=lambda: int(time.time()))
def same_database_representation(
self, other: 'ParsedSourceDefinition'
@@ -753,7 +725,7 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
@property
def depends_on_nodes(self):
@@ -799,88 +771,12 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
)
@dataclass
class ParsedMetric(UnparsedBaseNode, HasUniqueID, HasFqn):
model: str
name: str
description: str
label: str
type: str
sql: Optional[str]
timestamp: Optional[str]
filters: List[MetricFilter]
time_grains: List[str]
dimensions: List[str]
resource_type: NodeType = NodeType.Metric
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@property
def search_name(self):
return self.name
def same_model(self, old: 'ParsedMetric') -> bool:
return self.model == old.model
def same_dimensions(self, old: 'ParsedMetric') -> bool:
return self.dimensions == old.dimensions
def same_filters(self, old: 'ParsedMetric') -> bool:
return self.filters == old.filters
def same_description(self, old: 'ParsedMetric') -> bool:
return self.description == old.description
def same_label(self, old: 'ParsedMetric') -> bool:
return self.label == old.label
def same_type(self, old: 'ParsedMetric') -> bool:
return self.type == old.type
def same_sql(self, old: 'ParsedMetric') -> bool:
return self.sql == old.sql
def same_timestamp(self, old: 'ParsedMetric') -> bool:
return self.timestamp == old.timestamp
def same_time_grains(self, old: 'ParsedMetric') -> bool:
return self.time_grains == old.time_grains
def same_contents(self, old: Optional['ParsedMetric']) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True
return (
self.same_model(old) and
self.same_dimensions(old) and
self.same_filters(old) and
self.same_description(old) and
self.same_label(old) and
self.same_type(old) and
self.same_sql(old) and
self.same_timestamp(old) and
self.same_time_grains(old) and
True
)
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedSingularTestNode,
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -892,6 +788,5 @@ ParsedResource = Union[
ParsedMacro,
ParsedNode,
ParsedExposure,
ParsedMetric,
ParsedSourceDefinition,
]

View File

@@ -60,7 +60,6 @@ class UnparsedNode(UnparsedBaseNode, HasSQL):
NodeType.Operation,
NodeType.Seed,
NodeType.RPCCall,
NodeType.SqlOperation,
]})
@property
@@ -168,25 +167,20 @@ class TimePeriod(StrEnum):
@dataclass
class Time(dbtClassMixin, Mergeable):
count: Optional[int] = None
period: Optional[TimePeriod] = None
class Time(dbtClassMixin, Replaceable):
count: int
period: TimePeriod
def exceeded(self, actual_age: float) -> bool:
if self.period is None or self.count is None:
return False
kwargs: Dict[str, int] = {self.period.plural(): self.count}
kwargs = {self.period.plural(): self.count}
difference = timedelta(**kwargs).total_seconds()
return actual_age > difference
def __bool__(self):
return self.count is not None and self.period is not None
@dataclass
class FreshnessThreshold(dbtClassMixin, Mergeable):
warn_after: Optional[Time] = field(default_factory=Time)
error_after: Optional[Time] = field(default_factory=Time)
warn_after: Optional[Time] = None
error_after: Optional[Time] = None
filter: Optional[str] = None
def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus":
@@ -199,7 +193,7 @@ class FreshnessThreshold(dbtClassMixin, Mergeable):
return FreshnessStatus.Pass
def __bool__(self):
return bool(self.warn_after) or bool(self.error_after)
return self.warn_after is not None or self.error_after is not None
@dataclass
@@ -285,7 +279,7 @@ class UnparsedSourceDefinition(dbtClassMixin, Replaceable):
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if 'freshness' not in dct and self.freshness is None:
if 'freshnewss' not in dct and self.freshness is None:
dct['freshness'] = None
return dct
@@ -446,27 +440,3 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
tags: List[str] = field(default_factory=list)
url: Optional[str] = None
depends_on: List[str] = field(default_factory=list)
@dataclass
class MetricFilter(dbtClassMixin, Replaceable):
field: str
operator: str
# TODO : Can we make this Any?
value: str
@dataclass
class UnparsedMetric(dbtClassMixin, Replaceable):
model: str
name: str
label: str
type: str
description: str = ''
sql: Optional[str] = None
timestamp: Optional[str] = None
time_grains: List[str] = field(default_factory=list)
dimensions: List[str] = field(default_factory=list)
filters: List[MetricFilter] = field(default_factory=list)
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)

View File

@@ -1,6 +1,7 @@
from dbt.contracts.util import Replaceable, Mergeable, list_str
from dbt.contracts.connection import QueryComment, UserConfigContract
from dbt.helper_types import NoValue
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError,
HyphenatedDbtClassMixin,
@@ -231,13 +232,12 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
printer_width: Optional[int] = None
write_json: Optional[bool] = None
warn_error: Optional[bool] = None
log_format: Optional[str] = None
log_format: Optional[bool] = None
debug: Optional[bool] = None
version_check: Optional[bool] = None
fail_fast: Optional[bool] = None
use_experimental_parser: Optional[bool] = None
static_parser: Optional[bool] = None
indirect_selection: Optional[str] = None
@dataclass

View File

@@ -11,11 +11,10 @@ from dbt.contracts.util import (
schema_version,
)
from dbt.exceptions import InternalException
from dbt.events.functions import fire_event
from dbt.events.types import TimingInfoCollected
from dbt.logger import (
TimingProcessor,
JsonOnly,
GLOBAL_LOGGER as logger,
)
from dbt.utils import lowercase
from dbt.dataclass_schema import dbtClassMixin, StrEnum
@@ -55,13 +54,7 @@ class collect_timing_info:
def __exit__(self, exc_type, exc_value, traceback):
self.timing_info.end()
with JsonOnly(), TimingProcessor(self.timing_info):
fire_event(TimingInfoCollected())
class RunningStatus(StrEnum):
Started = 'started'
Compiling = 'compiling'
Executing = 'executing'
logger.debug('finished collecting timing info')
class NodeStatus(StrEnum):
@@ -192,7 +185,7 @@ class RunExecutionResult(
@dataclass
@schema_version('run-results', 4)
@schema_version('run-results', 3)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
@@ -376,7 +369,7 @@ class FreshnessResult(ExecutionResult):
@dataclass
@schema_version('sources', 3)
@schema_version('sources', 2)
class FreshnessExecutionResultArtifact(
ArtifactMixin,
VersionedSchema,

819
core/dbt/contracts/rpc.py Normal file
View File

@@ -0,0 +1,819 @@
import enum
import os
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional, Union, List, Any, Dict, Type, Sequence
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.graph.manifest import WritableManifest
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
CatalogArtifact,
CatalogResults,
ExecutionResult,
FreshnessExecutionResultArtifact,
FreshnessResult,
RunOperationResult,
RunOperationResultsArtifact,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.exceptions import InternalException
from dbt.logger import LogMessage
from dbt.utils import restrict_to
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Inputs
@dataclass
class RPCParameters(dbtClassMixin):
task_tags: TaskTags
timeout: Optional[float]
@classmethod
def __pre_deserialize__(cls, data, omit_none=True):
data = super().__pre_deserialize__(data)
if 'timeout' not in data:
data['timeout'] = None
if 'task_tags' not in data:
data['task_tags'] = None
return data
@dataclass
class RPCExecParameters(RPCParameters):
name: str
sql: str
macros: Optional[str] = None
@dataclass
class RPCCompileParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCListParameters(RPCParameters):
resource_types: Optional[List[str]] = None
models: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
selector: Optional[str] = None
output: Optional[str] = 'json'
output_keys: Optional[List[str]] = None
@dataclass
class RPCRunParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSnapshotParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCTestParameters(RPCCompileParameters):
data: bool = False
schema: bool = False
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSeedParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
show: bool = False
state: Optional[str] = None
@dataclass
class RPCDocsGenerateParameters(RPCParameters):
compile: bool = True
state: Optional[str] = None
@dataclass
class RPCBuildParameters(RPCParameters):
resource_types: Optional[List[str]] = None
select: Union[None, str, List[str]] = None
threads: Optional[int] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCCliParameters(RPCParameters):
cli: str
@dataclass
class RPCDepsParameters(RPCParameters):
pass
@dataclass
class KillParameters(RPCParameters):
task_id: TaskID
@dataclass
class PollParameters(RPCParameters):
request_token: TaskID
logs: bool = True
logs_start: int = 0
@dataclass
class PSParameters(RPCParameters):
active: bool = True
completed: bool = False
@dataclass
class StatusParameters(RPCParameters):
pass
@dataclass
class GCSettings(dbtClassMixin):
# start evicting the longest-ago-ended tasks here
maxsize: int
# start evicting all tasks before now - auto_reap_age when we have this
# many tasks in the table
reapsize: int
# a positive timedelta indicating how far back we should go
auto_reap_age: timedelta
@dataclass
class GCParameters(RPCParameters):
"""The gc endpoint takes three arguments, any of which may be present:
- task_ids: An optional list of task ID UUIDs to try to GC
- before: If provided, should be a datetime string. All tasks that finished
before that datetime will be GCed
- settings: If provided, should be a GCSettings object in JSON form. It
will be applied to the task manager before GC starts. By default the
existing gc settings remain.
"""
task_ids: Optional[List[TaskID]] = None
before: Optional[datetime] = None
settings: Optional[GCSettings] = None
@dataclass
class RPCRunOperationParameters(RPCParameters):
macro: str
args: Dict[str, Any] = field(default_factory=dict)
@dataclass
class RPCSourceFreshnessParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
@dataclass
class GetManifestParameters(RPCParameters):
pass
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
@schema_version('remote-list-results', 1)
class RemoteListResults(RemoteResult):
output: List[Any]
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-deps-result', 1)
class RemoteDepsResult(RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-catalog-result', 1)
class RemoteCatalogResults(CatalogResults, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
artifact = CatalogArtifact.from_results(
generated_at=self.generated_at,
nodes=self.nodes,
sources=self.sources,
compile_results=self._compile_results,
errors=self.errors,
)
artifact.write(path)
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-operation-result', 1)
class RemoteRunOperationResult(RunOperationResult, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_local_result(
cls,
base: RunOperationResultsArtifact,
logs: List[LogMessage],
) -> 'RemoteRunOperationResult':
return cls(
generated_at=base.metadata.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
success=base.success,
logs=logs,
)
def write(self, path: str):
writable = RunOperationResultsArtifact.from_success(
success=self.success,
generated_at=self.generated_at,
elapsed_time=self.elapsed_time,
)
writable.write(path)
@dataclass
@schema_version('remote-freshness-result', 1)
class RemoteFreshnessResult(FreshnessResult, RemoteResult):
@classmethod
def from_local_result(
cls,
base: FreshnessResult,
logs: List[LogMessage],
) -> 'RemoteFreshnessResult':
return cls(
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
)
def write(self, path: str):
writable = FreshnessExecutionResultArtifact.from_result(base=self)
writable.write(path)
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)
RPCResult = Union[
RemoteCompileResult,
RemoteExecutionResult,
RemoteFreshnessResult,
RemoteCatalogResults,
RemoteDepsResult,
RemoteRunOperationResult,
]
# GC types
class GCResultState(StrEnum):
Deleted = 'deleted' # successful GC
Missing = 'missing' # nothing to GC
Running = 'running' # can't GC
@dataclass
@schema_version('remote-gc-result', 1)
class GCResult(RemoteResult):
logs: List[LogMessage] = field(default_factory=list)
deleted: List[TaskID] = field(default_factory=list)
missing: List[TaskID] = field(default_factory=list)
running: List[TaskID] = field(default_factory=list)
def add_result(self, task_id: TaskID, state: GCResultState):
if state == GCResultState.Missing:
self.missing.append(task_id)
elif state == GCResultState.Running:
self.running.append(task_id)
elif state == GCResultState.Deleted:
self.deleted.append(task_id)
else:
raise InternalException(
f'Got invalid state in add_result: {state}'
)
# Task management types
class TaskHandlerState(StrEnum):
NotStarted = 'not started'
Initializing = 'initializing'
Running = 'running'
Success = 'success'
Error = 'error'
Killed = 'killed'
Failed = 'failed'
def __lt__(self, other) -> bool:
"""A logical ordering for TaskHandlerState:
NotStarted < Initializing < Running < (Success, Error, Killed, Failed)
"""
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other not in smaller
return False
def __le__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self < other) or
(self == other) or
(self.finished and other.finished))
def __gt__(self, other) -> bool:
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other in smaller
return other in smaller
def __ge__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self > other) or
(self == other) or
(self.finished and other.finished))
@property
def finished(self) -> bool:
return self in (self.Error, self.Success, self.Killed, self.Failed)
@dataclass
class TaskTiming(dbtClassMixin):
state: TaskHandlerState
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
class TaskRow(TaskTiming):
task_id: TaskID
request_source: str
method: str
request_id: Union[str, int]
tags: TaskTags = None
timeout: Optional[float] = None
@dataclass
@schema_version('remote-ps-result', 1)
class PSResult(RemoteResult):
rows: List[TaskRow]
class KillResultStatus(StrEnum):
Missing = 'missing'
NotStarted = 'not_started'
Killed = 'killed'
Finished = 'finished'
@dataclass
@schema_version('remote-kill-result', 1)
class KillResult(RemoteResult):
state: KillResultStatus = KillResultStatus.Missing
logs: List[LogMessage] = field(default_factory=list)
@dataclass
@schema_version('remote-manifest-result', 1)
class GetManifestResult(RemoteResult):
manifest: Optional[WritableManifest] = None
# this is kind of carefuly structured: BlocksManifestTasks is implied by
# RequiresConfigReloadBefore and RequiresManifestReloadAfter
class RemoteMethodFlags(enum.Flag):
Empty = 0
BlocksManifestTasks = 1
RequiresConfigReloadBefore = 3
RequiresManifestReloadAfter = 5
Builtin = 8
# Polling types
@dataclass
class PollResult(RemoteResult, TaskTiming):
state: TaskHandlerState
tags: TaskTags
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
@schema_version('poll-remote-deps-result', 1)
class PollRemoteEmptyCompleteResult(PollResult, RemoteResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_result(
cls: Type['PollRemoteEmptyCompleteResult'],
base: RemoteDepsResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRemoteEmptyCompleteResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-killed-result', 1)
class PollKilledResult(PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Killed),
)
@dataclass
@schema_version('poll-remote-execution-result', 1)
class PollExecuteCompleteResult(
RemoteExecutionResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollExecuteCompleteResult'],
base: RemoteExecutionResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollExecuteCompleteResult':
return cls(
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at,
)
@dataclass
@schema_version('poll-remote-compile-result', 1)
class PollCompileCompleteResult(
RemoteCompileResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCompileCompleteResult'],
base: RemoteCompileResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCompileCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-result', 1)
class PollRunCompleteResult(
RemoteRunResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunCompleteResult'],
base: RemoteRunResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
table=base.table,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-operation-result', 1)
class PollRunOperationCompleteResult(
RemoteRunOperationResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunOperationCompleteResult'],
base: RemoteRunOperationResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunOperationCompleteResult':
return cls(
success=base.success,
results=base.results,
generated_at=base.generated_at,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-catalog-result', 1)
class PollCatalogCompleteResult(RemoteCatalogResults, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCatalogCompleteResult'],
base: RemoteCatalogResults,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCatalogCompleteResult':
return cls(
nodes=base.nodes,
sources=base.sources,
generated_at=base.generated_at,
errors=base.errors,
_compile_results=base._compile_results,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-in-progress-result', 1)
class PollInProgressResult(PollResult):
pass
@dataclass
@schema_version('poll-remote-get-manifest-result', 1)
class PollGetManifestResult(GetManifestResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollGetManifestResult'],
base: GetManifestResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollGetManifestResult':
return cls(
manifest=base.manifest,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-freshness-result', 1)
class PollFreshnessResult(RemoteFreshnessResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollFreshnessResult'],
base: RemoteFreshnessResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollFreshnessResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
)
# Manifest parsing types
class ManifestStatus(StrEnum):
Init = 'init'
Compiling = 'compiling'
Ready = 'ready'
Error = 'error'
@dataclass
@schema_version('remote-status-result', 1)
class LastParse(RemoteResult):
state: ManifestStatus = ManifestStatus.Init
logs: List[LogMessage] = field(default_factory=list)
error: Optional[Dict[str, Any]] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
pid: int = field(default_factory=os.getpid)

View File

@@ -1,88 +0,0 @@
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List, Any, Dict, Sequence
from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
ExecutionResult,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.logger import LogMessage
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)

View File

@@ -11,7 +11,7 @@ from dbt.exceptions import (
RuntimeException,
)
from dbt.version import __version__
from dbt.events.functions import get_invocation_id
from dbt.tracking import get_invocation_id
from dbt.dataclass_schema import dbtClassMixin
SourceKey = Tuple[str, str]

View File

@@ -22,7 +22,7 @@ class DateTimeSerialization(SerializationStrategy):
out = value.isoformat()
# Assume UTC if timezone is missing
if value.tzinfo is None:
out += "Z"
out = out + "Z"
return out
def deserialize(self, value):

View File

@@ -36,9 +36,9 @@ class DBTDeprecation:
if self.name not in active_deprecations:
desc = self.description.format(**kwargs)
msg = ui.line_wrap_message(
desc, prefix='Deprecated functionality\n\n'
desc, prefix='* Deprecation Warning: '
)
dbt.exceptions.warn_or_error(msg, log_fmt=ui.warning_tag('{}'))
dbt.exceptions.warn_or_error(msg)
self.track_deprecation_warn()
active_deprecations.add(self.name)
@@ -61,20 +61,13 @@ class PackageInstallPathDeprecation(DBTDeprecation):
class ConfigPathDeprecation(DBTDeprecation):
_name = 'project_config_path'
_description = '''\
The `{deprecated_path}` config has been renamed to `{exp_path}`.
The `{deprecated_path}` config has been deprecated in favor of `{exp_path}`.
Please update your `dbt_project.yml` configuration to reflect this change.
'''
class ConfigSourcePathDeprecation(ConfigPathDeprecation):
_name = 'project-config-source-paths'
class ConfigDataPathDeprecation(ConfigPathDeprecation):
_name = 'project-config-data-paths'
_adapter_renamed_description = """\
The adapter function `adapter.{old_name}` is deprecated and will be removed in
a future release of dbt. Please use `adapter.{new_name}` instead.
@@ -113,8 +106,7 @@ def warn(name, *args, **kwargs):
active_deprecations: Set[str] = set()
deprecations_list: List[DBTDeprecation] = [
ConfigSourcePathDeprecation(),
ConfigDataPathDeprecation(),
ConfigPathDeprecation(),
PackageInstallPathDeprecation(),
PackageRedirectDeprecation()
]

View File

@@ -6,8 +6,7 @@ from typing import List, Optional, Generic, TypeVar
from dbt.clients import system
from dbt.contracts.project import ProjectPackageMetadata
from dbt.events.functions import fire_event
from dbt.events.types import DepsSetDownloadDirectory
from dbt.logger import GLOBAL_LOGGER as logger
DOWNLOADS_PATH = None
@@ -32,7 +31,7 @@ def downloads_directory():
remove_downloads = True
system.make_directory(DOWNLOADS_PATH)
fire_event(DepsSetDownloadDirectory(path=DOWNLOADS_PATH))
logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH))
yield DOWNLOADS_PATH

View File

@@ -12,8 +12,7 @@ from dbt.deps.base import PinnedPackage, UnpinnedPackage, get_downloads_path
from dbt.exceptions import (
ExecutableError, warn_or_error, raise_dependency_error
)
from dbt.events.functions import fire_event
from dbt.events.types import EnsureGitInstalled
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import ui
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
@@ -82,7 +81,11 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
)
except ExecutableError as exc:
if exc.cmd and exc.cmd[0] == 'git':
fire_event(EnsureGitInstalled())
logger.error(
'Make sure git is installed on your machine. More '
'information: '
'https://docs.getdbt.com/docs/package-management'
)
raise
return os.path.join(get_downloads_path(), dir_)

View File

@@ -6,8 +6,7 @@ from dbt.contracts.project import (
ProjectPackageMetadata,
LocalPackage,
)
from dbt.events.functions import fire_event
from dbt.events.types import DepsCreatingLocalSymlink, DepsSymlinkNotAvailable
from dbt.logger import GLOBAL_LOGGER as logger
class LocalPackageMixin:
@@ -58,11 +57,12 @@ class LocalPinnedPackage(LocalPackageMixin, PinnedPackage):
system.remove_file(dest_path)
if can_create_symlink:
fire_event(DepsCreatingLocalSymlink())
logger.debug(' Creating symlink to local dependency.')
system.make_symlink(src_path, dest_path)
else:
fire_event(DepsSymlinkNotAvailable())
logger.debug(' Symlinks are not available on this '
'OS, copying dependency.')
shutil.copytree(src_path, dest_path)

View File

@@ -127,12 +127,9 @@ class RegistryUnpinnedPackage(
raise DependencyException(new_msg) from e
available = registry.get_available_versions(self.package)
prerelease_version_specified = any(
bool(version.prerelease) for version in self.versions
)
installable = semver.filter_installable(
available,
self.install_prerelease or prerelease_version_specified
self.install_prerelease
)
available_latest = installable[-1]

View File

@@ -3,6 +3,7 @@ from typing import Dict, List, NoReturn, Union, Type, Iterator, Set
from dbt.exceptions import raise_dependency_error, InternalException
from dbt.context.target import generate_target_context
from dbt.config import Project, RuntimeConfig
from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.deps.base import BasePackage, PinnedPackage, UnpinnedPackage
@@ -125,7 +126,8 @@ def resolve_packages(
pending = PackageListing.from_contracts(packages)
final = PackageListing()
renderer = DbtProjectYamlRenderer(config, config.cli_vars)
ctx = generate_target_context(config, config.cli_vars)
renderer = DbtProjectYamlRenderer(ctx)
while pending:
next_pending = PackageListing()

View File

@@ -6,53 +6,7 @@ The Events module is the implmentation for structured logging. These events repr
The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `events.functions::fire_event` is the entry point to the module from everywhere in dbt.
# Adding a New Event
In `events.types` add a new class that represents the new event. All events must be a dataclass with, at minimum, a code. You may also include some other values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `Cli`) as well as the loglevel this event belongs to. This system has been designed to take full advantage of mypy so running it will catch anything you may miss.
## Required for Every Event
- a string attribute `code`, that's unique across events
- assign a log level by extending `DebugLevel`, `InfoLevel`, `WarnLevel`, or `ErrorLevel`
- a message()
- extend `File` and/or `Cli` based on where it should output
Example
```
@dataclass
class PartialParsingDeletedExposure(DebugLevel, Cli, File):
unique_id: str
code: str = "I049"
def message(self) -> str:
return f"Partial parsing: deleted exposure {self.unique_id}"
```
## Optional (based on your event)
- Events associated with node status changes must have `report_node_data` passed in and be extended with `NodeInfo`
- define `asdict` if your data is not serializable to json
Example
```
@dataclass
class SuperImportantNodeEvent(InfoLevel, File, NodeInfo):
node_name: str
run_result: RunResult
report_node_data: ParsedModelNode # may vary
code: str = "Q036"
def message(self) -> str:
return f"{self.node_name} had overly verbose result of {run_result}"
@classmethod
def asdict(cls, data: list) -> dict:
return dict((k, str(v)) for k, v in data)
```
All values other than `code` and `report_node_data` will be included in the `data` node of the json log output.
Once your event has been added, add a dummy call to your new event at the bottom of `types.py` and also add your new Event to the list `sample_values` in `test/unit/test_events.py'.
In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to.
# Adapter Maintainers
To integrate existing log messages from adapters, you likely have a line of code like this in your adapter already:

View File

@@ -3,63 +3,84 @@ from dbt.events.functions import fire_event
from dbt.events.types import (
AdapterEventDebug, AdapterEventInfo, AdapterEventWarning, AdapterEventError
)
from typing import Any
@dataclass
class AdapterLogger():
name: str
def debug(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventDebug(name=self.name, base_msg=msg, args=args)
def debug(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventDebug(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def info(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventInfo(name=self.name, base_msg=msg, args=args)
def info(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventInfo(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def warning(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventWarning(name=self.name, base_msg=msg, args=args)
def warning(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventWarning(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def error(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
def error(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
# The default exc_info=True is what makes this method different
def exception(self, msg, *args, exc_info=True, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
def exception(
self,
msg: str,
exc_info: Any = True, # this default is what makes this method different
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
fire_event(event)
def critical(self, msg, *args, exc_info=False, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)

View File

@@ -1,173 +0,0 @@
from abc import ABCMeta, abstractmethod, abstractproperty
from dataclasses import dataclass
from datetime import datetime
import os
import threading
from typing import Any, Optional
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# These base types define the _required structure_ for the concrete event #
# types defined in types.py #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# in preparation for #3977
class TestLevel():
def level_tag(self) -> str:
return "test"
class DebugLevel():
def level_tag(self) -> str:
return "debug"
class InfoLevel():
def level_tag(self) -> str:
return "info"
class WarnLevel():
def level_tag(self) -> str:
return "warn"
class ErrorLevel():
def level_tag(self) -> str:
return "error"
class Cache():
# Events with this class will only be logged when the `--log-cache-events` flag is passed
pass
@dataclass
class Node():
node_path: str
node_name: str
unique_id: str
resource_type: str
materialized: str
node_status: str
node_started_at: datetime
node_finished_at: Optional[datetime]
type: str = 'node_status'
@dataclass
class ShowException():
# N.B.:
# As long as we stick with the current convention of setting the member vars in the
# `message` method of subclasses, this is a safe operation.
# If that ever changes we'll want to reassess.
def __post_init__(self):
self.exc_info: Any = True
self.stack_info: Any = None
self.extra: Any = None
# TODO add exhaustiveness checking for subclasses
# can't use ABCs with @dataclass because of https://github.com/python/mypy/issues/5374
# top-level superclass for all events
class Event(metaclass=ABCMeta):
# fields that should be on all events with their default implementations
log_version: int = 1
ts: Optional[datetime] = None # use getter for non-optional
ts_rfc3339: Optional[str] = None # use getter for non-optional
pid: Optional[int] = None # use getter for non-optional
node_info: Optional[Node]
# four digit string code that uniquely identifies this type of event
# uniqueness and valid characters are enforced by tests
@abstractproperty
@staticmethod
def code() -> str:
raise Exception("code() not implemented for event")
# do not define this yourself. inherit it from one of the above level types.
@abstractmethod
def level_tag(self) -> str:
raise Exception("level_tag not implemented for Event")
# Solely the human readable message. Timestamps and formatting will be added by the logger.
# Must override yourself
@abstractmethod
def message(self) -> str:
raise Exception("msg not implemented for Event")
# exactly one time stamp per concrete event
def get_ts(self) -> datetime:
if not self.ts:
self.ts = datetime.utcnow()
self.ts_rfc3339 = self.ts.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
return self.ts
# preformatted time stamp
def get_ts_rfc3339(self) -> str:
if not self.ts_rfc3339:
# get_ts() creates the formatted string too so all time logic is centralized
self.get_ts()
return self.ts_rfc3339 # type: ignore
# exactly one pid per concrete event
def get_pid(self) -> int:
if not self.pid:
self.pid = os.getpid()
return self.pid
# in theory threads can change so we don't cache them.
def get_thread_name(self) -> str:
return threading.current_thread().getName()
@classmethod
def get_invocation_id(cls) -> str:
from dbt.events.functions import get_invocation_id
return get_invocation_id()
# default dict factory for all events. can override on concrete classes.
@classmethod
def asdict(cls, data: list) -> dict:
d = dict()
for k, v in data:
# stringify all exceptions
if isinstance(v, Exception) or isinstance(v, BaseException):
d[k] = str(v)
# skip all binary data
elif isinstance(v, bytes):
continue
else:
d[k] = v
return d
@dataclass # type: ignore
class NodeInfo(Event, metaclass=ABCMeta):
report_node_data: Any # Union[ParsedModelNode, ...] TODO: resolve circular imports
def get_node_info(self):
node_info = Node(
node_path=self.report_node_data.path,
node_name=self.report_node_data.name,
unique_id=self.report_node_data.unique_id,
resource_type=self.report_node_data.resource_type.value,
materialized=self.report_node_data.config.get('materialized'),
node_status=str(self.report_node_data._event_status.get('node_status')),
node_started_at=self.report_node_data._event_status.get("started_at"),
node_finished_at=self.report_node_data._event_status.get("finished_at")
)
return node_info
class File(Event, metaclass=ABCMeta):
# Solely the human readable message. Timestamps and formatting will be added by the logger.
def file_msg(self) -> str:
# returns the event msg unless overriden in the concrete class
return self.message()
class Cli(Event, metaclass=ABCMeta):
# Solely the human readable message. Timestamps and formatting will be added by the logger.
def cli_msg(self) -> str:
# returns the event msg unless overriden in the concrete class
return self.message()

View File

@@ -1,49 +0,0 @@
from dbt import ui
from dbt.node_types import NodeType
from typing import Optional, Union
def format_fancy_output_line(
msg: str, status: str, index: Optional[int],
total: Optional[int], execution_time: Optional[float] = None,
truncate: bool = False
) -> str:
if index is None or total is None:
progress = ''
else:
progress = '{} of {} '.format(index, total)
prefix = "{progress}{message}".format(
progress=progress,
message=msg)
truncate_width = ui.printer_width() - 3
justified = prefix.ljust(ui.printer_width(), ".")
if truncate and len(justified) > truncate_width:
justified = justified[:truncate_width] + '...'
if execution_time is None:
status_time = ""
else:
status_time = " in {execution_time:0.2f}s".format(
execution_time=execution_time)
output = "{justified} [{status}{status_time}]".format(
justified=justified, status=status, status_time=status_time)
return output
def _pluralize(string: Union[str, NodeType]) -> str:
try:
convert = NodeType(string)
except ValueError:
return f'{string}s'
else:
return convert.pluralize()
def pluralize(count, string: Union[str, NodeType]):
pluralized: str = str(string)
if count != 1:
pluralized = _pluralize(string)
return f'{count} {pluralized}'

View File

@@ -1,309 +1,162 @@
from colorama import Style
from datetime import datetime
import dbt.events.functions as this # don't worry I hate it too.
from dbt.events.base_types import Cli, Event, File, ShowException, NodeInfo, Cache
from dbt.events.types import EventBufferFull, T_Event, MainReportVersion, EmptyLine
from dbt.events.history import EVENT_HISTORY
from dbt.events.types import CliEventABC, Event, ShowException
import dbt.logger as logger # TODO remove references to this logger
import dbt.flags as flags
# TODO this will need to move eventually
from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing, GLOBAL_LOGGER
import json
import io
from io import StringIO, TextIOWrapper
import logbook
import logging.config
import logging
from logging import Logger
import sys
from logging.handlers import RotatingFileHandler
import os
import uuid
import threading
from typing import Any, Callable, Dict, List, Optional, Union
import dataclasses
from collections import deque
import structlog
import sys
# create the global event history buffer with the default max size (10k)
# python 3.7 doesn't support type hints on globals, but mypy requires them. hence the ignore.
# TODO the flags module has not yet been resolved when this is created
global EVENT_HISTORY
EVENT_HISTORY = deque(maxlen=flags.EVENT_BUFFER_SIZE) # type: ignore
# these two loggers be set up with CLI inputs via setup_event_logger
# DO NOT IMPORT AND USE THESE DIRECTLY
# create the global file logger with no configuration
global FILE_LOG
FILE_LOG = logging.getLogger('default_file')
null_handler = logging.NullHandler()
FILE_LOG.addHandler(null_handler)
global STDOUT_LOGGER
STDOUT_LOGGER = structlog.get_logger()
# set up logger to go to stdout with defaults
# setup_event_logger will be called once args have been parsed
global STDOUT_LOG
STDOUT_LOG = logging.getLogger('default_stdout')
STDOUT_LOG.setLevel(logging.INFO)
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setLevel(logging.INFO)
STDOUT_LOG.addHandler(stdout_handler)
format_color = True
format_json = False
invocation_id: Optional[str] = None
global FILE_LOGGER
FILE_LOGGER = structlog.get_logger()
def setup_event_logger(log_path, level_override=None):
# flags have been resolved, and log_path is known
global EVENT_HISTORY
EVENT_HISTORY = deque(maxlen=flags.EVENT_BUFFER_SIZE) # type: ignore
make_log_dir_if_missing(log_path)
this.format_json = flags.LOG_FORMAT == 'json'
def setup_event_logger(log_path):
logger.make_log_dir_if_missing(log_path)
json: bool = flags.LOG_FORMAT == 'json'
# USE_COLORS can be None if the app just started and the cli flags
# havent been applied yet
this.format_color = True if flags.USE_COLORS else False
colors: bool = True if flags.USE_COLORS else False
# TODO this default should live somewhere better
log_dest = os.path.join(log_path, 'dbt.log')
level = level_override or (logging.DEBUG if flags.DEBUG else logging.INFO)
log_dest = os.path.join(logger.LOG_DIR, 'dbt.log')
# overwrite the STDOUT_LOG logger with the configured one
this.STDOUT_LOG = logging.getLogger('configured_std_out')
this.STDOUT_LOG.setLevel(level)
# see: https://docs.python.org/3/library/logging.config.html#logging-config-dictschema
# logging.config.dictConfig({
# "version": 1,
# "disable_existing_loggers": False,
# "formatters": {
# "plain": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.dev.ConsoleRenderer(colors=False),
# "foreign_pre_chain": pre_chain,
# },
# "colored": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.dev.ConsoleRenderer(colors=True),
# "foreign_pre_chain": pre_chain,
# },
# "json": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.processors.JSONRenderer(),
# "foreign_pre_chain": pre_chain,
# },
# },
# "handlers": {
# "console": {
# "level": "DEBUG",
# "class": "logging.StreamHandler",
# "formatter": "colored",
# },
# "file": {
# "level": "DEBUG",
# "class": "logging.handlers.WatchedFileHandler",
# # TODO this default should live somewhere better
# "filename": os.path.join(logger.LOG_DIR, 'dbt.log'),
# "formatter": "plain",
# },
# "json-console": {
# "level": "DEBUG",
# "class": "logging.StreamHandler",
# "formatter": "json",
# },
# "json-file": {
# "level": "DEBUG",
# "class": "logging.handlers.WatchedFileHandler",
# # TODO this default should live somewhere better
# "filename": os.path.join(logger.LOG_DIR, 'dbt.log.json'),
# "formatter": "json",
# },
# },
# "loggers": {
# "": {
# "handlers": ["json-console", "json-file"] if json else ["console", "file"],
# "level": "DEBUG" if flags.DEBUG else "INFO",
# "propagate": True,
# },
# }
# })
FORMAT = "%(message)s"
stdout_passthrough_formatter = logging.Formatter(fmt=FORMAT)
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(stdout_passthrough_formatter)
stdout_handler.setLevel(level)
# clear existing stdout TextIOWrapper stream handlers
this.STDOUT_LOG.handlers = [
h for h in this.STDOUT_LOG.handlers
if not (hasattr(h, 'stream') and isinstance(h.stream, TextIOWrapper)) # type: ignore
]
this.STDOUT_LOG.addHandler(stdout_handler)
# overwrite the FILE_LOG logger with the configured one
this.FILE_LOG = logging.getLogger('configured_file')
this.FILE_LOG.setLevel(logging.DEBUG) # always debug regardless of user input
file_passthrough_formatter = logging.Formatter(fmt=FORMAT)
file_handler = RotatingFileHandler(
filename=log_dest,
encoding='utf8',
maxBytes=10 * 1024 * 1024, # 10 mb
backupCount=5
# set-up global logging configurations
structlog.configure(
wrapper_class=structlog.stdlib.BoundLogger,
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=False,
)
file_handler.setFormatter(file_passthrough_formatter)
file_handler.setLevel(logging.DEBUG) # always debug regardless of user input
this.FILE_LOG.handlers.clear()
this.FILE_LOG.addHandler(file_handler)
# configure the stdout logger
STDOUT_LOGGER = structlog.wrap_logger(
logger=logging.Logger('console logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper("%H:%M:%S"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
]
)
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.dev.ConsoleRenderer(colors=colors),
)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
STDOUT_LOGGER.addHandler(handler)
# used for integration tests
def capture_stdout_logs() -> StringIO:
capture_buf = io.StringIO()
stdout_capture_handler = logging.StreamHandler(capture_buf)
stdout_handler.setLevel(logging.DEBUG)
this.STDOUT_LOG.addHandler(stdout_capture_handler)
return capture_buf
# used for integration tests
def stop_capture_stdout_logs() -> None:
this.STDOUT_LOG.handlers = [
h for h in this.STDOUT_LOG.handlers
if not (hasattr(h, 'stream') and isinstance(h.stream, StringIO)) # type: ignore
]
def env_secrets() -> List[str]:
return [
v for k, v in os.environ.items()
if k.startswith(SECRET_ENV_PREFIX)
]
def scrub_secrets(msg: str, secrets: List[str]) -> str:
scrubbed = msg
for secret in secrets:
scrubbed = scrubbed.replace(secret, "*****")
return scrubbed
# returns a dictionary representation of the event fields. You must specify which of the
# available messages you would like to use (i.e. - e.message, e.cli_msg(), e.file_msg())
# used for constructing json formatted events. includes secrets which must be scrubbed at
# the usage site.
def event_to_serializable_dict(
e: T_Event, ts_fn: Callable[[datetime], str],
msg_fn: Callable[[T_Event], str]
) -> Dict[str, Any]:
data = dict()
node_info = dict()
log_line = dict()
try:
log_line = dataclasses.asdict(e, dict_factory=type(e).asdict)
except AttributeError:
event_type = type(e).__name__
raise Exception( # TODO this may hang async threads
f"type {event_type} is not serializable to json."
f" First make sure that the call sites for {event_type} match the type hints"
f" and if they do, you can override the dataclass method `asdict` in {event_type} in"
" types.py to define your own serialization function to a dictionary of valid json"
" types"
# configure the json file handler
if json:
FILE_LOGGER = structlog.wrap_logger(
logger=logging.Logger('json file logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
]
)
if isinstance(e, NodeInfo):
node_info = dataclasses.asdict(e.get_node_info())
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.processors.JSONRenderer(),
)
handler = logging.handlers.WatchedFileHandler(filename=log_dest)
handler.setFormatter(formatter)
FILE_LOGGER.addHandler(handler)
for field, value in log_line.items(): # type: ignore[attr-defined]
if field not in ["code", "report_node_data"]:
data[field] = value
event_dict = {
'type': 'log_line',
'log_version': e.log_version,
'ts': ts_fn(e.get_ts()),
'pid': e.get_pid(),
'msg': msg_fn(e),
'level': e.level_tag(),
'data': data,
'invocation_id': e.get_invocation_id(),
'thread_name': e.get_thread_name(),
'node_info': node_info,
'code': e.code
}
return event_dict
# translates an Event to a completely formatted text-based log line
# you have to specify which message you want. (i.e. - e.message, e.cli_msg(), e.file_msg())
# type hinting everything as strings so we don't get any unintentional string conversions via str()
def create_info_text_log_line(e: T_Event, msg_fn: Callable[[T_Event], str]) -> str:
color_tag: str = '' if this.format_color else Style.RESET_ALL
ts: str = e.get_ts().strftime("%H:%M:%S")
scrubbed_msg: str = scrub_secrets(msg_fn(e), env_secrets())
log_line: str = f"{color_tag}{ts} {scrubbed_msg}"
return log_line
def create_debug_text_log_line(e: T_Event, msg_fn: Callable[[T_Event], str]) -> str:
log_line: str = ''
# Create a separator if this is the beginning of an invocation
if type(e) == MainReportVersion:
separator = 30 * '='
log_line = f'\n\n{separator} {e.get_ts()} | {get_invocation_id()} {separator}\n'
color_tag: str = '' if this.format_color else Style.RESET_ALL
ts: str = e.get_ts().strftime("%H:%M:%S.%f")
scrubbed_msg: str = scrub_secrets(msg_fn(e), env_secrets())
level: str = e.level_tag() if len(e.level_tag()) == 5 else f"{e.level_tag()} "
thread = ''
if threading.current_thread().getName():
thread_name = threading.current_thread().getName()
thread_name = thread_name[:10]
thread_name = thread_name.ljust(10, ' ')
thread = f' [{thread_name}]:'
log_line = log_line + f"{color_tag}{ts} [{level}]{thread} {scrubbed_msg}"
return log_line
# translates an Event to a completely formatted json log line
# you have to specify which message you want. (i.e. - e.message(), e.cli_msg(), e.file_msg())
def create_json_log_line(e: T_Event, msg_fn: Callable[[T_Event], str]) -> Optional[str]:
if type(e) == EmptyLine:
return None # will not be sent to logger
# using preformatted string instead of formatting it here to be extra careful about timezone
values = event_to_serializable_dict(e, lambda _: e.get_ts_rfc3339(), lambda x: msg_fn(x))
raw_log_line = json.dumps(values, sort_keys=True)
return scrub_secrets(raw_log_line, env_secrets())
# calls create_stdout_text_log_line() or create_json_log_line() according to logger config
def create_log_line(
e: T_Event,
msg_fn: Callable[[T_Event], str],
file_output=False
) -> Optional[str]:
if this.format_json:
return create_json_log_line(e, msg_fn) # json output, both console and file
elif file_output is True or flags.DEBUG:
return create_debug_text_log_line(e, msg_fn) # default file output
# configure the plaintext file handler
else:
return create_info_text_log_line(e, msg_fn) # console output
# allows for resuse of this obnoxious if else tree.
# do not use for exceptions, it doesn't pass along exc_info, stack_info, or extra
def send_to_logger(l: Union[Logger, logbook.Logger], level_tag: str, log_line: str):
if not log_line:
return
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(log_line)
elif level_tag == 'debug':
l.debug(log_line)
elif level_tag == 'info':
l.info(log_line)
elif level_tag == 'warn':
l.warning(log_line)
elif level_tag == 'error':
l.error(log_line)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
# TODO follow pattern from above ^^
FILE_LOGGER = structlog.wrap_logger(
logger=logging.Logger('plaintext file logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper("%H:%M:%S"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
]
)
def send_exc_to_logger(
l: Logger,
level_tag: str,
log_line: str,
exc_info=True,
stack_info=False,
extra=False
):
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'debug':
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'info':
l.info(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'warn':
l.warning(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'error':
l.error(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.dev.ConsoleRenderer(colors=False),
)
handler = logging.handlers.WatchedFileHandler(filename=log_dest)
handler.setFormatter(formatter)
FILE_LOGGER.addHandler(handler)
# top-level method for accessing the new eventing system
@@ -311,66 +164,55 @@ def send_exc_to_logger(
# (i.e. - mutating the event history, printing to stdout, logging
# to files, etc.)
def fire_event(e: Event) -> None:
# skip logs when `--log-cache-events` is not passed
if isinstance(e, Cache) and not flags.LOG_CACHE_EVENTS:
return
# if and only if the event history deque will be completely filled by this event
# fire warning that old events are now being dropped
global EVENT_HISTORY
if len(EVENT_HISTORY) == (flags.EVENT_BUFFER_SIZE - 1):
EVENT_HISTORY.append(e)
fire_event(EventBufferFull())
else:
EVENT_HISTORY.append(e)
# backwards compatibility for plugins that require old logger (dbt-rpc)
if flags.ENABLE_LEGACY_LOGGER:
# using Event::message because the legacy logger didn't differentiate messages by
# destination
log_line = create_log_line(e, msg_fn=lambda x: x.message())
if log_line:
send_to_logger(GLOBAL_LOGGER, e.level_tag(), log_line)
return # exit the function to avoid using the current logger as well
# always logs debug level regardless of user input
if isinstance(e, File):
log_line = create_log_line(e, msg_fn=lambda x: x.file_msg(), file_output=True)
# doesn't send exceptions to exception logger
if log_line:
send_to_logger(FILE_LOG, level_tag=e.level_tag(), log_line=log_line)
if isinstance(e, Cli):
# explicitly checking the debug flag here so that potentially expensive-to-construct
# log messages are not constructed if debug messages are never shown.
if e.level_tag() == 'debug' and not flags.DEBUG:
return # eat the message in case it was one of the expensive ones
log_line = create_log_line(e, msg_fn=lambda x: x.cli_msg())
if log_line:
if not isinstance(e, ShowException):
send_to_logger(STDOUT_LOG, level_tag=e.level_tag(), log_line=log_line)
# CliEventABC and ShowException
EVENT_HISTORY.append(e)
level_tag = e.level_tag()
if isinstance(e, CliEventABC):
log_line = e.cli_msg()
if isinstance(e, ShowException):
event_dict = {
'exc_info': e.exc_info,
'stack_info': e.stack_info,
'extra': e.extra
}
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
STDOUT_LOGGER.debug(log_line, event_dict)
FILE_LOGGER.debug(log_line, event_dict)
elif level_tag == 'debug':
STDOUT_LOGGER.debug(log_line, event_dict)
FILE_LOGGER.debug(log_line, event_dict)
elif level_tag == 'info':
STDOUT_LOGGER.info(log_line, event_dict)
FILE_LOGGER.info(log_line, event_dict)
elif level_tag == 'warn':
STDOUT_LOGGER.warning(log_line, event_dict)
FILE_LOGGER.warning(log_line, event_dict)
elif level_tag == 'error':
STDOUT_LOGGER.error(log_line, event_dict)
FILE_LOGGER.error(log_line, event_dict)
else:
send_exc_to_logger(
STDOUT_LOG,
level_tag=e.level_tag(),
log_line=log_line,
exc_info=e.exc_info,
stack_info=e.stack_info,
extra=e.extra
raise AssertionError(
f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}"
)
# CliEventABC but not ShowException
else:
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
STDOUT_LOGGER.debug(log_line)
FILE_LOGGER.debug(log_line)
elif level_tag == 'debug':
STDOUT_LOGGER.debug(log_line)
FILE_LOGGER.debug(log_line)
elif level_tag == 'info':
STDOUT_LOGGER.info(log_line)
FILE_LOGGER.info(log_line)
elif level_tag == 'warn':
STDOUT_LOGGER.warning(log_line)
FILE_LOGGER.warning(log_line)
elif level_tag == 'error':
STDOUT_LOGGER.error(log_line)
FILE_LOGGER.error(log_line)
else:
raise AssertionError(
f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}"
)
def get_invocation_id() -> str:
global invocation_id
if invocation_id is None:
invocation_id = str(uuid.uuid4())
return invocation_id
def set_invocation_id() -> None:
# This is primarily for setting the invocation_id for separate
# commands in the dbt servers. It shouldn't be necessary for the CLI.
global invocation_id
invocation_id = str(uuid.uuid4())

View File

@@ -0,0 +1,7 @@
from dbt.events.types import Event
from typing import List
# the global history of events for this session
# TODO this is naive and the memory footprint is likely far too large.
EVENT_HISTORY: List[Event] = []

View File

@@ -1,72 +0,0 @@
from typing import (
Any,
List,
NamedTuple,
Optional,
Dict,
)
# N.B.:
# These stubs were autogenerated by stubgen and then hacked
# to pieces to ensure we had something other than "Any" types
# where using external classes to instantiate event subclasses
# in events/types.py.
#
# This goes away when we turn mypy on for everything.
#
# Don't trust them too much at all!
class _ReferenceKey(NamedTuple):
database: Any
schema: Any
identifier: Any
class _CachedRelation:
referenced_by: Any
inner: Any
class BaseRelation:
path: Any
type: Optional[Any]
quote_character: str
include_policy: Any
quote_policy: Any
dbt_created: bool
class InformationSchema(BaseRelation):
information_schema_view: Optional[str]
class CompiledNode():
compiled_sql: Optional[str]
extra_ctes_injected: bool
extra_ctes: List[Any]
relation_name: Optional[str]
class CompiledModelNode(CompiledNode):
resource_type: Any
class ParsedModelNode():
resource_type: Any
class ParsedHookNode():
resource_type: Any
index: Optional[int]
class RunResult():
status: str
timing: List[Any]
thread_id: str
execution_time: float
adapter_response: Dict[str, Any]
message: Optional[str]
failures: Optional[int]
node: Any

View File

@@ -1,81 +0,0 @@
from dataclasses import dataclass
from .types import (
InfoLevel,
DebugLevel,
WarnLevel,
ErrorLevel,
ShowException,
Cli
)
# Keeping log messages for testing separate since they are used for debugging.
# Reuse the existing messages when adding logs to tests.
@dataclass
class IntegrationTestInfo(InfoLevel, Cli):
msg: str
code: str = "T001"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestDebug(DebugLevel, Cli):
msg: str
code: str = "T002"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestWarn(WarnLevel, Cli):
msg: str
code: str = "T003"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestError(ErrorLevel, Cli):
msg: str
code: str = "T004"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestException(ShowException, ErrorLevel, Cli):
msg: str
code: str = "T005"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class UnitTestInfo(InfoLevel, Cli):
msg: str
code: str = "T006"
def message(self) -> str:
return f"Unit Test: {self.msg}"
# since mypy doesn't run on every file we need to suggest to mypy that every
# class gets instantiated. But we don't actually want to run this code.
# making the conditional `if False` causes mypy to skip it as dead code so
# we need to skirt around that by computing something it doesn't check statically.
#
# TODO remove these lines once we run mypy everywhere.
if 1 == 0:
IntegrationTestInfo(msg='')
IntegrationTestDebug(msg='')
IntegrationTestWarn(msg='')
IntegrationTestError(msg='')
IntegrationTestException(msg='')
UnitTestInfo(msg='')

File diff suppressed because it is too large Load Diff

View File

@@ -2,9 +2,7 @@ import builtins
import functools
from typing import NoReturn, Optional, Mapping, Any
from dbt.logger import get_secret_env
from dbt.events.functions import fire_event
from dbt.events.types import GeneralWarningMsg, GeneralWarningException
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt import flags
from dbt.ui import line_wrap_message, warning_tag
@@ -244,15 +242,6 @@ class ValidationException(RuntimeException):
MESSAGE = "Validation Error"
class ParsingException(RuntimeException):
CODE = 10015
MESSAGE = "Parsing Error"
@property
def type(self):
return 'Parsing'
class JSONValidationException(ValidationException):
def __init__(self, typename, errors):
self.typename = typename
@@ -401,8 +390,6 @@ class CommandError(RuntimeException):
super().__init__(message)
self.cwd = cwd
self.cmd = cmd
for secret in get_secret_env():
self.cmd = str(self.cmd).replace(secret, "*****")
self.args = (cwd, cmd, message)
def __str__(self):
@@ -457,10 +444,6 @@ def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)
def raise_parsing_error(msg, node=None) -> NoReturn:
raise ParsingException(msg, node)
def raise_database_error(msg, node=None) -> NoReturn:
raise DatabaseException(msg, node)
@@ -469,14 +452,6 @@ def raise_dependency_error(msg) -> NoReturn:
raise DependencyException(msg)
def disallow_secret_env_var(env_var_name) -> NoReturn:
"""Raise an error when a secret env var is referenced outside allowed
rendering contexts"""
msg = ("Secret env vars are allowed only in profiles.yml or packages.yml. "
"Found '{env_var_name}' referenced elsewhere.")
raise_parsing_error(msg.format(env_var_name=env_var_name))
def invalid_type_error(method_name, arg_name, got_value, expected_type,
version='0.13.0') -> NoReturn:
"""Raise a CompilationException when an adapter method available to macros
@@ -1001,14 +976,19 @@ def warn_or_error(msg, node=None, log_fmt=None):
if flags.WARN_ERROR:
raise_compiler_error(msg, node)
else:
fire_event(GeneralWarningMsg(msg=msg, log_fmt=log_fmt))
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
def warn_or_raise(exc, log_fmt=None):
if flags.WARN_ERROR:
raise exc
else:
fire_event(GeneralWarningException(exc=exc, log_fmt=log_fmt))
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
def warn(msg, node=None):

View File

@@ -17,6 +17,7 @@ PROFILES_DIR = os.path.expanduser(
STRICT_MODE = False # Only here for backwards compatibility
FULL_REFRESH = False # subcommand
STORE_FAILURES = False # subcommand
GREEDY = None # subcommand
# Global CLI commands
USE_EXPERIMENTAL_PARSER = None
@@ -32,9 +33,6 @@ FAIL_FAST = None
SEND_ANONYMOUS_USAGE_STATS = None
PRINTER_WIDTH = 80
WHICH = None
INDIRECT_SELECTION = None
LOG_CACHE_EVENTS = None
EVENT_BUFFER_SIZE = 100000
# Global CLI defaults. These flags are set from three places:
# CLI args, environment variables, and user_config (profiles.yml).
@@ -52,10 +50,7 @@ flag_defaults = {
"VERSION_CHECK": True,
"FAIL_FAST": False,
"SEND_ANONYMOUS_USAGE_STATS": True,
"PRINTER_WIDTH": 80,
"INDIRECT_SELECTION": 'eager',
"LOG_CACHE_EVENTS": False,
"EVENT_BUFFER_SIZE": 100000
"PRINTER_WIDTH": 80
}
@@ -86,7 +81,6 @@ def env_set_path(key: str) -> Optional[Path]:
MACRO_DEBUGGING = env_set_truthy('DBT_MACRO_DEBUGGING')
DEFER_MODE = env_set_truthy('DBT_DEFER_TO_STATE')
ARTIFACT_STATE_PATH = env_set_path('DBT_ARTIFACT_STATE_PATH')
ENABLE_LEGACY_LOGGER = env_set_truthy('DBT_ENABLE_LEGACY_LOGGER')
def _get_context():
@@ -101,14 +95,15 @@ MP_CONTEXT = _get_context()
def set_from_args(args, user_config):
global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, INDIRECT_SELECTION, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH, \
WHICH, LOG_CACHE_EVENTS, EVENT_BUFFER_SIZE
WHICH
STRICT_MODE = False # backwards compatibility
# cli args without user_config or env var option
FULL_REFRESH = getattr(args, 'full_refresh', FULL_REFRESH)
STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
GREEDY = getattr(args, 'greedy', GREEDY)
WHICH = getattr(args, 'which', WHICH)
# global cli flags with env var and user_config alternatives
@@ -125,9 +120,6 @@ def set_from_args(args, user_config):
FAIL_FAST = get_flag_value('FAIL_FAST', args, user_config)
SEND_ANONYMOUS_USAGE_STATS = get_flag_value('SEND_ANONYMOUS_USAGE_STATS', args, user_config)
PRINTER_WIDTH = get_flag_value('PRINTER_WIDTH', args, user_config)
INDIRECT_SELECTION = get_flag_value('INDIRECT_SELECTION', args, user_config)
LOG_CACHE_EVENTS = get_flag_value('LOG_CACHE_EVENTS', args, user_config)
EVENT_BUFFER_SIZE = get_flag_value('EVENT_BUFFER_SIZE', args, user_config)
def get_flag_value(flag, args, user_config):
@@ -140,13 +132,7 @@ def get_flag_value(flag, args, user_config):
if env_value is not None and env_value != '':
env_value = env_value.lower()
# non Boolean values
if flag in [
'LOG_FORMAT',
'PRINTER_WIDTH',
'PROFILES_DIR',
'INDIRECT_SELECTION',
'EVENT_BUFFER_SIZE'
]:
if flag in ['LOG_FORMAT', 'PRINTER_WIDTH', 'PROFILES_DIR']:
flag_value = env_value
else:
flag_value = env_set_bool(env_value)
@@ -154,7 +140,7 @@ def get_flag_value(flag, args, user_config):
flag_value = getattr(user_config, lc_flag)
else:
flag_value = flag_defaults[flag]
if flag in ['PRINTER_WIDTH', 'EVENT_BUFFER_SIZE']: # must be ints
if flag == 'PRINTER_WIDTH': # printer_width must be an int or it hangs
flag_value = int(flag_value)
if flag == 'PROFILES_DIR':
flag_value = os.path.abspath(flag_value)
@@ -177,7 +163,4 @@ def get_flag_dict():
"fail_fast": FAIL_FAST,
"send_anonymous_usage_stats": SEND_ANONYMOUS_USAGE_STATS,
"printer_width": PRINTER_WIDTH,
"indirect_selection": INDIRECT_SELECTION,
"log_cache_events": LOG_CACHE_EVENTS,
"event_buffer_size": EVENT_BUFFER_SIZE
}

View File

@@ -16,18 +16,16 @@ from .selector_spec import (
SelectionIntersection,
SelectionDifference,
SelectionCriteria,
IndirectSelection
)
INTERSECTION_DELIMITER = ','
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*', 'metric:*']
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*']
DEFAULT_EXCLUDES: List[str] = []
def parse_union(
components: List[str], expect_exists: bool,
indirect_selection: IndirectSelection = IndirectSelection.Eager
components: List[str], expect_exists: bool, greedy: bool = False
) -> SelectionUnion:
# turn ['a b', 'c'] -> ['a', 'b', 'c']
raw_specs = itertools.chain.from_iterable(
@@ -38,7 +36,7 @@ def parse_union(
# ['a', 'b', 'c,d'] -> union('a', 'b', intersection('c', 'd'))
for raw_spec in raw_specs:
intersection_components: List[SelectionSpec] = [
SelectionCriteria.from_single_spec(part, indirect_selection=indirect_selection)
SelectionCriteria.from_single_spec(part, greedy=greedy)
for part in raw_spec.split(INTERSECTION_DELIMITER)
]
union_components.append(SelectionIntersection(
@@ -54,36 +52,21 @@ def parse_union(
def parse_union_from_default(
raw: Optional[List[str]], default: List[str],
indirect_selection: IndirectSelection = IndirectSelection.Eager
raw: Optional[List[str]], default: List[str], greedy: bool = False
) -> SelectionUnion:
components: List[str]
expect_exists: bool
if raw is None:
return parse_union(
components=default,
expect_exists=False,
indirect_selection=indirect_selection)
return parse_union(components=default, expect_exists=False, greedy=greedy)
else:
return parse_union(
components=raw,
expect_exists=True,
indirect_selection=indirect_selection)
return parse_union(components=raw, expect_exists=True, greedy=greedy)
def parse_difference(
include: Optional[List[str]], exclude: Optional[List[str]]
) -> SelectionDifference:
included = parse_union_from_default(
include,
DEFAULT_INCLUDES,
indirect_selection=IndirectSelection(flags.INDIRECT_SELECTION)
)
excluded = parse_union_from_default(
exclude,
DEFAULT_EXCLUDES,
indirect_selection=IndirectSelection.Eager)
included = parse_union_from_default(include, DEFAULT_INCLUDES, greedy=bool(flags.GREEDY))
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
return SelectionDifference(components=[included, excluded])
@@ -165,7 +148,7 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
union_def_parts = _get_list_dicts(definition, 'union')
include, exclude = _parse_include_exclude_subdefs(union_def_parts)
union = SelectionUnion(components=include)
union = SelectionUnion(components=include, greedy_warning=False)
if exclude is None:
union.raw = definition
@@ -173,7 +156,8 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
else:
return SelectionDifference(
components=[union, exclude],
raw=definition
raw=definition,
greedy_warning=False
)
@@ -182,7 +166,7 @@ def parse_intersection_definition(
) -> SelectionSpec:
intersection_def_parts = _get_list_dicts(definition, 'intersection')
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
intersection = SelectionIntersection(components=include)
intersection = SelectionIntersection(components=include, greedy_warning=False)
if exclude is None:
intersection.raw = definition
@@ -190,7 +174,8 @@ def parse_intersection_definition(
else:
return SelectionDifference(
components=[intersection, exclude],
raw=definition
raw=definition,
greedy_warning=False
)
@@ -224,7 +209,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
if diff_arg is None:
return base
else:
return SelectionDifference(components=[base, diff_arg])
return SelectionDifference(components=[base, diff_arg], greedy_warning=False)
def parse_from_definition(

View File

@@ -1,7 +1,6 @@
from typing import (
Set, Iterable, Iterator, Optional, NewType
)
from itertools import product
import networkx as nx # type: ignore
from dbt.exceptions import InternalException
@@ -78,26 +77,17 @@ class Graph:
successors.update(self.graph.successors(node))
return successors
def get_subset_graph(self, selected: Iterable[UniqueId]) -> "Graph":
def get_subset_graph(self, selected: Iterable[UniqueId]) -> 'Graph':
"""Create and return a new graph that is a shallow copy of the graph,
but with only the nodes in include_nodes. Transitive edges across
removed nodes are preserved as explicit new edges.
"""
new_graph = nx.algorithms.transitive_closure(self.graph)
new_graph = self.graph.copy()
include_nodes = set(selected)
for node in self:
if node not in include_nodes:
source_nodes = [x for x, _ in new_graph.in_edges(node)]
target_nodes = [x for _, x in new_graph.out_edges(node)]
new_edges = product(source_nodes, target_nodes)
non_cyclic_new_edges = [
(source, target) for source, target in new_edges if source != target
] # removes cyclic refs
new_graph.add_edges_from(non_cyclic_new_edges)
new_graph.remove_node(node)
for node in include_nodes:
@@ -106,7 +96,6 @@ class Graph:
"Couldn't find model '{}' -- does it exist or is "
"it disabled?".format(node)
)
return Graph(new_graph)
def subgraph(self, nodes: Iterable[UniqueId]) -> 'Graph':

View File

@@ -5,7 +5,7 @@ from queue import PriorityQueue
from typing import Dict, Set, List, Generator, Optional
from .graph import UniqueId
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure, ParsedMetric
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure
from dbt.contracts.graph.compiled import GraphMemberNode
from dbt.contracts.graph.manifest import Manifest
from dbt.node_types import NodeType
@@ -47,8 +47,8 @@ class GraphQueue:
node = self.manifest.expect(node_id)
if node.resource_type != NodeType.Model:
return False
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure, ParsedMetric))
# must be a Model - tell mypy this won't be a Source or Exposure
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure))
if node.is_ephemeral:
return False
return True

View File

@@ -3,10 +3,12 @@ from typing import Set, List, Optional, Tuple
from .graph import Graph, UniqueId
from .queue import GraphQueue
from .selector_methods import MethodManager
from .selector_spec import SelectionCriteria, SelectionSpec, IndirectSelection
from .selector_spec import SelectionCriteria, SelectionSpec
from dbt.events.functions import fire_event
from dbt.events.types import SelectorReportInvalidSelector
from dbt.events.types import (
SelectorAlertUpto3UnusedNodes, SelectorAlertAllUnusedNodes, SelectorReportInvalidSelector
)
from dbt.node_types import NodeType
from dbt.exceptions import (
InternalException,
@@ -30,6 +32,12 @@ def alert_non_existence(raw_spec, nodes):
)
def alert_unused_nodes(raw_spec, node_names):
fire_event(SelectorAlertUpto3UnusedNodes(node_names=node_names))
if len(node_names) > 4:
fire_event(SelectorAlertAllUnusedNodes(node_names=node_names))
def can_select_indirectly(node):
"""If a node is not selected itself, but its parent(s) are, it may qualify
for indirect selection.
@@ -96,7 +104,7 @@ class NodeSelector(MethodManager):
neighbors = self.collect_specified_neighbors(spec, collected)
direct_nodes, indirect_nodes = self.expand_selection(
selected=(collected | neighbors),
indirect_selection=spec.indirect_selection
greedy=spec.greedy
)
return direct_nodes, indirect_nodes
@@ -168,8 +176,6 @@ class NodeSelector(MethodManager):
return source.config.enabled
elif unique_id in self.manifest.exposures:
return True
elif unique_id in self.manifest.metrics:
return True
node = self.manifest.nodes[unique_id]
return not node.empty and node.config.enabled
@@ -187,8 +193,6 @@ class NodeSelector(MethodManager):
node = self.manifest.sources[unique_id]
elif unique_id in self.manifest.exposures:
node = self.manifest.exposures[unique_id]
elif unique_id in self.manifest.metrics:
node = self.manifest.metrics[unique_id]
else:
raise InternalException(
f'Node {unique_id} not found in the manifest!'
@@ -204,21 +208,21 @@ class NodeSelector(MethodManager):
}
def expand_selection(
self, selected: Set[UniqueId],
indirect_selection: IndirectSelection = IndirectSelection.Eager
self, selected: Set[UniqueId], greedy: bool = False
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
# Test selection by default expands to include an implicitly/indirectly selected tests.
# `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, EAGER and CAUTIOUS.
# Test selection can expand to include an implicitly/indirectly selected test.
# In this way, `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, GREEDY and NOT GREEDY.
#
# EAGER mode: If ANY parent is selected, select the test.
# GREEDY mode: If ANY parent is selected, select the test. We use this for EXCLUSION.
#
# CAUTIOUS mode:
# NOT GREEDY mode:
# - If ALL parents are selected, select the test.
# - If ANY parent is missing, return it separately. We'll keep it around
# for later and see if its other parents show up.
# Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
# CLI argument or by specifying `indirect_selection: true` in a yaml selector
# We use this for INCLUSION.
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
# or by specifying `greedy: true` in a yaml selector
direct_nodes = set(selected)
indirect_nodes = set()
@@ -228,10 +232,7 @@ class NodeSelector(MethodManager):
node = self.manifest.nodes[unique_id]
if can_select_indirectly(node):
# should we add it in directly?
if (
indirect_selection == IndirectSelection.Eager or
set(node.depends_on.nodes) <= set(selected)
):
if greedy or set(node.depends_on.nodes) <= set(selected):
direct_nodes.add(unique_id)
# if not:
else:
@@ -245,10 +246,6 @@ class NodeSelector(MethodManager):
# Check tests previously selected indirectly to see if ALL their
# parents are now present.
# performance: if identical, skip the processing below
if set(direct_nodes) == set(indirect_nodes):
return direct_nodes
selected = set(direct_nodes)
for unique_id in indirect_nodes:
@@ -272,6 +269,16 @@ class NodeSelector(MethodManager):
selected_nodes, indirect_only = self.select_nodes(spec)
filtered_nodes = self.filter_selection(selected_nodes)
if indirect_only:
filtered_unused_nodes = self.filter_selection(indirect_only)
if filtered_unused_nodes and spec.greedy_warning:
# log anything that didn't make the cut
unused_node_names = []
for unique_id in filtered_unused_nodes:
name = self.manifest.nodes[unique_id].name
unused_node_names.append(name)
alert_unused_nodes(spec, unused_node_names)
return filtered_nodes
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:

View File

@@ -18,7 +18,6 @@ from dbt.contracts.graph.parsed import (
HasTestMetadata,
ParsedSingularTestNode,
ParsedExposure,
ParsedMetric,
ParsedGenericTestNode,
ParsedSourceDefinition,
)
@@ -46,7 +45,6 @@ class MethodName(StrEnum):
ResourceType = 'resource_type'
State = 'state'
Exposure = 'exposure'
Metric = 'metric'
Result = 'result'
@@ -74,7 +72,7 @@ def is_selected_node(fqn: List[str], node_selector: str):
return True
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure, ParsedMetric]
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure]
class SelectorMethod(metaclass=abc.ABCMeta):
@@ -121,25 +119,13 @@ class SelectorMethod(metaclass=abc.ABCMeta):
continue
yield unique_id, exposure
def metric_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ParsedMetric]]:
for key, metric in self.manifest.metrics.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, metric
def all_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
yield from chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
self.exposure_nodes(included_nodes))
def configurable_nodes(
self,
@@ -151,10 +137,9 @@ class SelectorMethod(metaclass=abc.ABCMeta):
def non_source_nodes(
self,
included_nodes: Set[UniqueId],
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode, ParsedMetric]]]:
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode]]]:
yield from chain(self.parsed_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
self.exposure_nodes(included_nodes))
@abc.abstractmethod
def search(
@@ -266,33 +251,6 @@ class ExposureSelectorMethod(SelectorMethod):
yield node
class MetricSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
parts = selector.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_name = parts[0]
elif len(parts) == 2:
target_package, target_name = parts
else:
msg = (
'Invalid metric selector value "{}". Metrics must be of '
'the form ${{metric_name}} or '
'${{metric_package.metric_name}}'
).format(selector)
raise RuntimeException(msg)
for node, real_node in self.metric_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_name not in (real_node.name, SELECTOR_GLOB):
continue
yield node
class PathSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
@@ -554,8 +512,6 @@ class StateSelectorMethod(SelectorMethod):
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
if checker(previous_node, real_node):
yield node
@@ -588,10 +544,8 @@ class MethodManager:
MethodName.Config: ConfigSelectorMethod,
MethodName.TestName: TestNameSelectorMethod,
MethodName.TestType: TestTypeSelectorMethod,
MethodName.ResourceType: ResourceTypeSelectorMethod,
MethodName.State: StateSelectorMethod,
MethodName.Exposure: ExposureSelectorMethod,
MethodName.Metric: MetricSelectorMethod,
MethodName.Result: ResultSelectorMethod,
}

View File

@@ -2,7 +2,6 @@ import os
import re
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
from dbt.dataclass_schema import StrEnum
from typing import (
Set, Iterator, List, Optional, Dict, Union, Any, Iterable, Tuple
@@ -23,11 +22,6 @@ RAW_SELECTOR_PATTERN = re.compile(
SELECTOR_METHOD_SEPARATOR = '.'
class IndirectSelection(StrEnum):
Eager = 'eager'
Cautious = 'cautious'
def _probably_path(value: str):
"""Decide if value is probably a path. Windows has two path separators, so
we should check both sep ('\\') and altsep ('/') there.
@@ -72,7 +66,8 @@ class SelectionCriteria:
parents_depth: Optional[int]
children: bool
children_depth: Optional[int]
indirect_selection: IndirectSelection = IndirectSelection.Eager
greedy: bool = False
greedy_warning: bool = False # do not raise warning for yaml selectors
def __post_init__(self):
if self.children and self.childrens_parents:
@@ -110,8 +105,7 @@ class SelectionCriteria:
@classmethod
def selection_criteria_from_dict(
cls, raw: Any, dct: Dict[str, Any],
indirect_selection: IndirectSelection = IndirectSelection.Eager
cls, raw: Any, dct: Dict[str, Any], greedy: bool = False
) -> 'SelectionCriteria':
if 'value' not in dct:
raise RuntimeException(
@@ -121,12 +115,6 @@ class SelectionCriteria:
parents_depth = _match_to_int(dct, 'parents_depth')
children_depth = _match_to_int(dct, 'children_depth')
# If defined field in selector, override CLI flag
indirect_selection = IndirectSelection(
dct.get('indirect_selection', None) or indirect_selection
)
return cls(
raw=raw,
method=method_name,
@@ -137,7 +125,7 @@ class SelectionCriteria:
parents_depth=parents_depth,
children=bool(dct.get('children')),
children_depth=children_depth,
indirect_selection=indirect_selection
greedy=(greedy or bool(dct.get('greedy'))),
)
@classmethod
@@ -149,7 +137,7 @@ class SelectionCriteria:
method_name, method_arguments = cls.parse_method(dct)
meth_name = str(method_name)
if method_arguments:
meth_name += '.' + '.'.join(method_arguments)
meth_name = meth_name + '.' + '.'.join(method_arguments)
dct['method'] = meth_name
dct = {k: v for k, v in dct.items() if (v is not None and v != '')}
if 'childrens_parents' in dct:
@@ -158,23 +146,18 @@ class SelectionCriteria:
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
if 'greedy' in dct:
dct['greedy'] = bool(dct.get('greedy'))
return dct
@classmethod
def from_single_spec(
cls, raw: str,
indirect_selection: IndirectSelection = IndirectSelection.Eager
) -> 'SelectionCriteria':
def from_single_spec(cls, raw: str, greedy: bool = False) -> 'SelectionCriteria':
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
# bad spec!
raise RuntimeException(f'Invalid selector spec "{raw}"')
return cls.selection_criteria_from_dict(
raw,
result.groupdict(),
indirect_selection=indirect_selection
)
return cls.selection_criteria_from_dict(raw, result.groupdict(), greedy=greedy)
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
@@ -182,10 +165,12 @@ class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
self,
components: Iterable[SelectionSpec],
expect_exists: bool = False,
greedy_warning: bool = True,
raw: Any = None,
):
self.components: List[SelectionSpec] = list(components)
self.expect_exists = expect_exists
self.greedy_warning = greedy_warning
self.raw = raw
def __iter__(self) -> Iterator[SelectionSpec]:

View File

@@ -2,6 +2,5 @@ config-version: 2
name: dbt
version: 1.0
docs-paths: ["docs"]
docs-paths: ['docs']
macro-paths: ["macros"]
test-paths: ["tests"]

View File

@@ -26,7 +26,7 @@ On model pages, you'll see the immediate parents and children of the model you'r
button at the top-right of this lineage pane, you'll be able to see all of the models that are used to build,
or are built from, the model you're exploring.
Once expanded, you'll be able to use the `--select` and `--exclude` model selection syntax to filter the
Once expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).
Note that you can also right-click on models to interactively filter and explore the graph.
@@ -35,9 +35,9 @@ Note that you can also right-click on models to interactively filter and explore
### More information
- [What is dbt](https://docs.getdbt.com/docs/introduction)?
- [What is dbt](https://docs.getdbt.com/docs/overview)?
- Read the [dbt viewpoint](https://docs.getdbt.com/docs/viewpoint)
- [Installation](https://docs.getdbt.com/docs/installation)
- Join the [dbt Community](https://www.getdbt.com/community/) for questions and discussion
- Join the [chat](https://community.getdbt.com/) on Slack for live questions and support.
{% enddocs %}

View File

@@ -1,89 +0,0 @@
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{# helper for adapter-specific implementations of get_columns_in_relation #}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if add_columns and remove_columns }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -0,0 +1,344 @@
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}
{% macro create_table_as(temporary, relation, sql) -%}
{{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, sql) }}
{%- endmacro %}
{% macro default__create_table_as(temporary, relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
as (
{{ sql }}
);
{% endmacro %}
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro create_view_as(relation, sql) -%}
{{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}
{%- endmacro %}
{% macro default__create_view_as(relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create view {{ relation }} as (
{{ sql }}
);
{% endmacro %}
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if remove_columns | length > 0 }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -1,26 +0,0 @@
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}

View File

@@ -1,23 +0,0 @@
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}

View File

@@ -1,65 +0,0 @@
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

View File

@@ -1,33 +0,0 @@
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}

View File

@@ -1,84 +0,0 @@
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro get_or_create_relation(database, schema, identifier, type) -%}
{{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }}
{% endmacro %}
{% macro default__get_or_create_relation(database, schema, identifier, type) %}
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
{% if target_relation %}
{% do return([true, target_relation]) %}
{% endif %}
{%- set new_relation = api.Relation.create(
database=database,
schema=schema,
identifier=identifier,
type=type
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}
{# a user-friendly interface into adapter.get_relation #}
{% macro load_relation(relation) %}
{% do return(adapter.get_relation(
database=relation.database,
schema=relation.schema,
identifier=relation.identifier
)) -%}
{% endmacro %}
{# not used much, here for backwards compatibility #}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}

View File

@@ -1,20 +0,0 @@
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}

View File

@@ -15,7 +15,6 @@
{%- endif -%}
{%- endmacro %}
{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}
{%- set sql = caller() -%}
@@ -29,13 +28,3 @@
{%- endif -%}
{%- endmacro %}
{# a user-friendly interface into statements #}
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -1,3 +1,4 @@
{% macro convert_datetime(date_str, date_fmt) %}
{% set error_msg -%}
@@ -9,7 +10,6 @@
{% endmacro %}
{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt="%Y%m%d", out_fmt="%Y%m%d") %}
{% set end_date_str = start_date_str if end_date_str is none else end_date_str %}
@@ -38,7 +38,6 @@
{{ return(date_list) }}
{% endmacro %}
{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}
{% set partition_range = (raw_partition_date | string).split(",") %}
@@ -55,7 +54,6 @@
{{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}
{% endmacro %}
{% macro py_current_timestring() %}
{% set dt = modules.datetime.datetime.now() %}
{% do return(dt.strftime("%Y%m%d%H%M%S%f")) %}

View File

@@ -12,7 +12,6 @@
node: The available node that an alias is being generated for, or none
#}
{% macro generate_alias_name(custom_alias_name=none, node=none) -%}
{% do return(adapter.dispatch('generate_alias_name', 'dbt')(custom_alias_name, node)) %}
{%- endmacro %}

View File

@@ -14,7 +14,7 @@
node: The node the schema is being generated for
#}
{% macro generate_schema_name(custom_schema_name=none, node=none) -%}
{% macro generate_schema_name(custom_schema_name, node) -%}
{{ return(adapter.dispatch('generate_schema_name', 'dbt')(custom_schema_name, node)) }}
{% endmacro %}

View File

@@ -0,0 +1,8 @@
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -1,5 +1,5 @@
{% macro get_where_subquery(relation) -%}
{% do return(adapter.dispatch('get_where_subquery', 'dbt')(relation)) %}
{% do return(adapter.dispatch('get_where_subquery')(relation)) %}
{%- endmacro %}
{% macro default__get_where_subquery(relation) -%}

View File

@@ -1,8 +0,0 @@
{% macro default__test_not_null(model, column_name) %}
select *
from {{ model }}
where {{ column_name }} is null
{% endmacro %}

View File

@@ -25,3 +25,9 @@ where value_field not in (
)
{% endmacro %}
{% test accepted_values(model, column_name, values, quote=True) %}
{% set macro = adapter.dispatch('test_accepted_values', 'dbt') %}
{{ macro(model, column_name, values, quote) }}
{% endtest %}

View File

@@ -0,0 +1,13 @@
{% macro default__test_not_null(model, column_name) %}
select *
from {{ model }}
where {{ column_name }} is null
{% endmacro %}
{% test not_null(model, column_name) %}
{% set macro = adapter.dispatch('test_not_null', 'dbt') %}
{{ macro(model, column_name) }}
{% endtest %}

View File

@@ -1,3 +1,4 @@
{% macro default__test_relationships(model, column_name, to, field) %}
with child as (
@@ -21,3 +22,9 @@ left join parent
where parent.to_field is null
{% endmacro %}
{% test relationships(model, column_name, to, field) %}
{% set macro = adapter.dispatch('test_relationships', 'dbt') %}
{{ macro(model, column_name, to, field) }}
{% endtest %}

View File

@@ -11,3 +11,8 @@ having count(*) > 1
{% endmacro %}
{% test unique(model, column_name) %}
{% set macro = adapter.dispatch('test_unique', 'dbt') %}
{{ macro(model, column_name) }}
{% endtest %}

View File

@@ -1,7 +1,20 @@
{% macro get_merge_sql(target, source, unique_key, dest_columns, predicates=none) -%}
{{ adapter.dispatch('get_merge_sql', 'dbt')(target, source, unique_key, dest_columns, predicates) }}
{%- endmacro %}
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns) }}
{%- endmacro %}
{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}
{{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}
{%- endmacro %}
{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}
{%- set predicates = [] if predicates is none else [] + predicates -%}
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
@@ -39,11 +52,18 @@
{% endmacro %}
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns) }}
{%- endmacro %}
{% macro get_quoted_csv(column_names) %}
{% set quoted = [] %}
{% for col in column_names -%}
{%- do quoted.append(adapter.quote(col)) -%}
{%- endfor %}
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{%- set dest_cols_csv = quoted | join(', ') -%}
{{ return(dest_cols_csv) }}
{% endmacro %}
{% macro common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%}
@@ -63,10 +83,10 @@
{%- endmacro %}
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) }}
{% endmacro %}
{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}
{{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}
{%- endmacro %}
{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}
{%- set predicates = [] if predicates is none else [] + predicates -%}

View File

@@ -1,21 +0,0 @@
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}
{% macro should_full_refresh() %}
{% set config_full_refresh = config.get('full_refresh') %}
{% if config_full_refresh is none %}
{% set config_full_refresh = flags.FULL_REFRESH %}
{% endif %}
{% do return(config_full_refresh) %}
{% endmacro %}
{% macro should_store_failures() %}
{% set config_store_failures = config.get('store_failures') %}
{% if config_store_failures is none %}
{% set config_store_failures = flags.STORE_FAILURES %}
{% endif %}
{% do return(config_store_failures) %}
{% endmacro %}

View File

@@ -0,0 +1,83 @@
{% macro run_hooks(hooks, inside_transaction=True) %}
{% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}
{% if not inside_transaction and loop.first %}
{% call statement(auto_begin=inside_transaction) %}
commit;
{% endcall %}
{% endif %}
{% set rendered = render(hook.get('sql')) | trim %}
{% if (rendered | length) > 0 %}
{% call statement(auto_begin=inside_transaction) %}
{{ rendered }}
{% endcall %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro column_list(columns) %}
{%- for col in columns %}
{{ col.name }} {% if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
{% macro column_list_for_create_table(columns) %}
{%- for col in columns %}
{{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
{% macro make_hook_config(sql, inside_transaction) %}
{{ tojson({"sql": sql, "transaction": inside_transaction}) }}
{% endmacro %}
{% macro before_begin(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro in_transaction(sql) %}
{{ make_hook_config(sql, inside_transaction=True) }}
{% endmacro %}
{% macro after_commit(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}
{% macro load_relation(relation) %}
{% do return(adapter.get_relation(
database=relation.database,
schema=relation.schema,
identifier=relation.identifier
)) -%}
{% endmacro %}
{% macro should_full_refresh() %}
{% set config_full_refresh = config.get('full_refresh') %}
{% if config_full_refresh is none %}
{% set config_full_refresh = flags.FULL_REFRESH %}
{% endif %}
{% do return(config_full_refresh) %}
{% endmacro %}
{% macro should_store_failures() %}
{% set config_store_failures = config.get('store_failures') %}
{% if config_store_failures is none %}
{% set config_store_failures = flags.STORE_FAILURES %}
{% endif %}
{% do return(config_store_failures) %}
{% endmacro %}

View File

@@ -1,35 +0,0 @@
{% macro run_hooks(hooks, inside_transaction=True) %}
{% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}
{% if not inside_transaction and loop.first %}
{% call statement(auto_begin=inside_transaction) %}
commit;
{% endcall %}
{% endif %}
{% set rendered = render(hook.get('sql')) | trim %}
{% if (rendered | length) > 0 %}
{% call statement(auto_begin=inside_transaction) %}
{{ rendered }}
{% endcall %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro make_hook_config(sql, inside_transaction) %}
{{ tojson({"sql": sql, "transaction": inside_transaction}) }}
{% endmacro %}
{% macro before_begin(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro in_transaction(sql) %}
{{ make_hook_config(sql, inside_transaction=True) }}
{% endmacro %}
{% macro after_commit(sql) %}
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}

View File

@@ -0,0 +1,21 @@
{% macro incremental_upsert(tmp_relation, target_relation, unique_key=none, statement_name="main") %}
{%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
{%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
{%- if unique_key is not none -%}
delete
from {{ target_relation }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ tmp_relation }}
);
{%- endif %}
insert into {{ target_relation }} ({{ dest_cols_csv }})
(
select {{ dest_cols_csv }}
from {{ tmp_relation }}
);
{%- endmacro %}

View File

@@ -53,12 +53,8 @@
{% do adapter.expand_target_column_types(
from_relation=tmp_relation,
to_relation=target_relation) %}
{#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}
{% set dest_columns = process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
{% if not dest_columns %}
{% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}
{% endif %}
{% set build_sql = get_delete_insert_merge_sql(target_relation, tmp_relation, unique_key, dest_columns) %}
{% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
{% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}
{% endif %}

Some files were not shown because too many files have changed in this diff Show More