Compare commits

..

22 Commits

Author SHA1 Message Date
Nathaniel May
1e2bcef0ac use concrete stdlib loggers 2021-11-05 12:28:30 -04:00
Nathaniel May
a546e79d06 inline common processors 2021-11-05 10:38:22 -04:00
Nathaniel May
b8c9555347 logging works now but too many times. 2021-11-05 10:21:23 -04:00
Nathaniel May
2c8f1c9a78 split out loggers into values 2021-11-04 16:55:07 -04:00
Nathaniel May
b1db4d7978 move two log messages to event system 2021-11-03 15:54:53 -04:00
Nathaniel May
d1b9fbb7a3 bug fix for logging 2021-11-03 12:29:43 -04:00
Nathaniel May
29f34769df remove print 2021-11-03 11:57:45 -04:00
Nathaniel May
6ca5fa8f4a first pass at json logging. fails. 2021-11-03 11:57:04 -04:00
Nathaniel May
9febe38781 add adapter logging interface, and change postgres adapter to use it. 2021-11-02 13:53:07 -04:00
Nathaniel May
a517375c6c add comment 2021-11-02 10:47:01 -04:00
Nathaniel May
a9758297d5 make logger global 2021-11-02 10:46:33 -04:00
Nathaniel May
c087d3b2dc failed attempt at file logging 2021-11-01 16:27:01 -04:00
Nathaniel May
55b33031fc use structlog configs 2021-11-01 13:22:02 -04:00
Nathaniel May
593b562611 refactor for cleaner if else tree 2021-11-01 12:21:28 -04:00
Nathaniel May
57d364212d move datetime into event type 2021-11-01 11:32:24 -04:00
Nathaniel May
415cc9c702 add structlog to event module 2021-11-01 10:19:42 -04:00
Nathaniel May
d2f0e2d1e1 Change Graph logger call sites (#4165)
graph call sites for structured logging
2021-10-29 17:08:30 -04:00
Nathaniel May
e29db5897f Client call sites (#4163)
update log call sites with new event system
2021-10-29 16:35:48 -04:00
Nathaniel May
87b8ca9615 Handle exec info (#4168)
handle exec info
2021-10-29 16:01:04 -04:00
Emily Rockman
a3dc5efda7 context call sites (#4164)
* updated context dir to new structured logging
2021-10-29 10:12:09 -05:00
Nathaniel May
1015b89dbf Initial structured logging work with fire_event (#4137)
add event type modeling and fire_event calls
2021-10-29 09:16:06 -04:00
Nathaniel May
5c9fd07050 init 2021-10-26 13:57:30 -04:00
833 changed files with 4558 additions and 12037 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.0.1
current_version = 1.0.0b2
parse = (?P<major>\d+)
\.(?P<minor>\d+)
\.(?P<patch>\d+)

43
.github/CODEOWNERS vendored
View File

@@ -1,43 +0,0 @@
# This file contains the code owners for the dbt-core repo.
# PRs will be automatically assigned for review to the associated
# team(s) or person(s) that touches any files that are mapped to them.
#
# A statement takes precedence over the statements above it so more general
# assignments are found at the top with specific assignments being lower in
# the ordering (i.e. catch all assignment should be the first item)
#
# Consult GitHub documentation for formatting guidelines:
# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#example-of-a-codeowners-file
# As a default for areas with no assignment,
# the core team as a whole will be assigned
* @dbt-labs/core
# Changes to GitHub configurations including Actions
/.github/ @leahwicz
# Language core modules
/core/dbt/config/ @dbt-labs/core-language
/core/dbt/context/ @dbt-labs/core-language
/core/dbt/contracts/ @dbt-labs/core-language
/core/dbt/deps/ @dbt-labs/core-language
/core/dbt/parser/ @dbt-labs/core-language
# Execution core modules
/core/dbt/events/ @dbt-labs/core-execution @dbt-labs/core-language # eventually remove language but they have knowledge here now
/core/dbt/graph/ @dbt-labs/core-execution
/core/dbt/task/ @dbt-labs/core-execution
# Adapter interface, scaffold, Postgres plugin
/core/dbt/adapters @dbt-labs/core-adapters
/core/scripts/create_adapter_plugin.py @dbt-labs/core-adapters
/plugins/ @dbt-labs/core-adapters
# Global project: default macros, including generic tests + materializations
/core/dbt/include/global_project @dbt-labs/core-execution @dbt-labs/core-adapters
# Perf regression testing framework
# This excludes the test project files itself since those aren't specific
# framework changes (excluded by not setting an owner next to it- no owner)
/performance @nathaniel-may
/performance/projects

View File

@@ -1,6 +1,6 @@
module.exports = ({ context }) => {
const defaultPythonVersion = "3.8";
const supportedPythonVersions = ["3.7", "3.8", "3.9"];
const supportedPythonVersions = ["3.6", "3.7", "3.8", "3.9"];
const supportedAdapters = ["postgres"];
// if PR, generate matrix based on files changed and PR labels

View File

@@ -1,34 +0,0 @@
# **what?**
# When a PR is merged, if it has the backport label, it will create
# a new PR to backport those changes to the given branch. If it can't
# cleanly do a backport, it will comment on the merged PR of the failure.
#
# Label naming convention: "backport <branch name to backport to>"
# Example: backport 1.0.latest
#
# You MUST "Squash and merge" the original PR or this won't work.
# **why?**
# Changes sometimes need to be backported to release branches.
# This automates the backporting process
# **when?**
# Once a PR is "Squash and merge"'d and it has been correctly labeled
# according to the naming convention.
name: Backport
on:
pull_request:
types:
- closed
- labeled
jobs:
backport:
runs-on: ubuntu-18.04
name: Backport
steps:
- name: Backport
uses: tibdex/backport@v1.1.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -1,26 +0,0 @@
# **what?**
# Mirrors issues into Jira. Includes the information: title,
# GitHub Issue ID and URL
# **why?**
# Jira is our tool for tracking and we need to see these issues in there
# **when?**
# On issue creation or when an issue is labeled `Jira`
name: Jira Issue Creation
on:
issues:
types: [opened, labeled]
permissions:
issues: write
jobs:
call-label-action:
uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main
secrets:
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

View File

@@ -1,27 +0,0 @@
# **what?**
# Calls mirroring Jira label Action. Includes adding a new label
# to an existing issue or removing a label as well
# **why?**
# Jira is our tool for tracking and we need to see these labels in there
# **when?**
# On labels being added or removed from issues
name: Jira Label Mirroring
on:
issues:
types: [labeled, unlabeled]
permissions:
issues: read
jobs:
call-label-action:
uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main
secrets:
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

View File

@@ -1,24 +0,0 @@
# **what?**
# Transition a Jira issue to a new state
# Only supports these GitHub Issue transitions:
# closed, deleted, reopened
# **why?**
# Jira needs to be kept up-to-date
# **when?**
# On issue closing, deletion, reopened
name: Jira Issue Transition
on:
issues:
types: [closed, deleted, reopened]
jobs:
call-label-action:
uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main
secrets:
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

View File

@@ -77,7 +77,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.6, 3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
env:
TOXENV: "unit"
@@ -167,7 +167,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.7, 3.8, 3.9]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- name: Set up Python ${{ matrix.python-version }}
@@ -198,9 +198,8 @@ jobs:
dbt --version
- name: Install source distributions
# ignore dbt-1.0.0, which intentionally raises an error when installed from source
run: |
find ./dist/dbt-[a-z]*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |

View File

@@ -1,200 +0,0 @@
# **what?**
# Take the given commit, run unit tests specifically on that sha, build and
# package it, and then release to GitHub and PyPi with that specific build
# **why?**
# Ensure an automated and tested release process
# **when?**
# This will only run manually with a given sha and version
name: Release to GitHub and PyPi
on:
workflow_dispatch:
inputs:
sha:
description: 'The last commit sha in the release'
required: true
version_number:
description: 'The release version number (i.e. 1.0.0b1)'
required: true
defaults:
run:
shell: bash
jobs:
unit:
name: Unit test
runs-on: ubuntu-latest
env:
TOXENV: "unit"
steps:
- name: Check out the repository
uses: actions/checkout@v2
with:
persist-credentials: false
ref: ${{ github.event.inputs.sha }}
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install tox
pip --version
tox --version
- name: Run tox
run: tox
build:
name: build packages
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v2
with:
persist-credentials: false
ref: ${{ github.event.inputs.sha }}
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install --upgrade setuptools wheel twine check-wheel-contents
pip --version
- name: Build distributions
run: ./scripts/build-dist.sh
- name: Show distributions
run: ls -lh dist/
- name: Check distribution descriptions
run: |
twine check dist/*
- name: Check wheel contents
run: |
check-wheel-contents dist/*.whl --ignore W007,W008
- uses: actions/upload-artifact@v2
with:
name: dist
path: |
dist/
!dist/dbt-${{github.event.inputs.version_number}}.tar.gz
test-build:
name: verify packages
needs: [build, unit]
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install python dependencies
run: |
pip install --user --upgrade pip
pip install --upgrade wheel
pip --version
- uses: actions/download-artifact@v2
with:
name: dist
path: dist/
- name: Show distributions
run: ls -lh dist/
- name: Install wheel distributions
run: |
find ./dist/*.whl -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check wheel distributions
run: |
dbt --version
- name: Install source distributions
run: |
find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
- name: Check source distributions
run: |
dbt --version
github-release:
name: GitHub Release
needs: test-build
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v2
with:
name: dist
path: '.'
# Need to set an output variable because env variables can't be taken as input
# This is needed for the next step with releasing to GitHub
- name: Find release type
id: release_type
env:
IS_PRERELEASE: ${{ contains(github.event.inputs.version_number, 'rc') || contains(github.event.inputs.version_number, 'b') }}
run: |
echo ::set-output name=isPrerelease::$IS_PRERELEASE
- name: Creating GitHub Release
uses: softprops/action-gh-release@v1
with:
name: dbt-core v${{github.event.inputs.version_number}}
tag_name: v${{github.event.inputs.version_number}}
prerelease: ${{ steps.release_type.outputs.isPrerelease }}
target_commitish: ${{github.event.inputs.sha}}
body: |
[Release notes](https://github.com/dbt-labs/dbt-core/blob/main/CHANGELOG.md)
files: |
dbt_postgres-${{github.event.inputs.version_number}}-py3-none-any.whl
dbt_core-${{github.event.inputs.version_number}}-py3-none-any.whl
dbt-postgres-${{github.event.inputs.version_number}}.tar.gz
dbt-core-${{github.event.inputs.version_number}}.tar.gz
pypi-release:
name: Pypi release
runs-on: ubuntu-latest
needs: github-release
environment: PypiProd
steps:
- uses: actions/download-artifact@v2
with:
name: dist
path: 'dist'
- name: Publish distribution to PyPI
uses: pypa/gh-action-pypi-publish@v1.4.2
with:
password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -1,71 +0,0 @@
# This Action checks makes a dbt run to sample json structured logs
# and checks that they conform to the currently documented schema.
#
# If this action fails it either means we have unintentionally deviated
# from our documented structured logging schema, or we need to bump the
# version of our structured logging and add new documentation to
# communicate these changes.
name: Structured Logging Schema Check
on:
push:
branches:
- "main"
- "*.latest"
- "releases/*"
pull_request:
workflow_dispatch:
permissions: read-all
jobs:
# run the performance measurements on the current or default branch
test-schema:
name: Test Log Schema
runs-on: ubuntu-latest
env:
# turns warnings into errors
RUSTFLAGS: "-D warnings"
# points tests to the log file
LOG_DIR: "/home/runner/work/dbt-core/dbt-core/logs"
# tells integration tests to output into json format
DBT_LOG_FORMAT: 'json'
steps:
- name: checkout dev
uses: actions/checkout@v2
with:
persist-credentials: false
- name: Setup Python
uses: actions/setup-python@v2.2.2
with:
python-version: "3.8"
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: install dbt
run: pip install -r dev-requirements.txt -r editable-requirements.txt
- name: Set up postgres
uses: ./.github/actions/setup-postgres-linux
- name: ls
run: ls
# integration tests generate a ton of logs in different files. the next step will find them all.
# we actually care if these pass, because the normal test run doesn't usually include many json log outputs
- name: Run integration tests
run: tox -e py38-postgres -- -nauto
# apply our schema tests to every log event from the previous step
# skips any output that isn't valid json
- uses: actions-rs/cargo@v1
with:
command: run
args: --manifest-path test/interop/log_parsing/Cargo.toml

View File

@@ -66,12 +66,12 @@ jobs:
git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
# - name: Generate Docker requirements
# run: |
# source env/bin/activate
# pip install -r requirements.txt
# pip freeze -l > docker/requirements/requirements.txt
# git status
- name: Generate Docker requirements
run: |
source env/bin/activate
pip install -r requirements.txt
pip freeze -l > docker/requirements/requirements.txt
git status
- name: Bump version
run: |

View File

@@ -4,17 +4,16 @@ The core function of dbt is SQL compilation and execution. Users create projects
Most of the python code in the repository is within the `core/dbt` directory. Currently the main subdirectories are:
- [`adapters`](core/dbt/adapters/README.md): Define base classes for behavior that is likely to differ across databases
- [`clients`](core/dbt/clients/README.md): Interface with dependencies (agate, jinja) or across operating systems
- [`config`](core/dbt/config/README.md): Reconcile user-supplied configuration from connection profiles, project files, and Jinja macros
- [`context`](core/dbt/context/README.md): Build and expose dbt-specific Jinja functionality
- [`contracts`](core/dbt/contracts/README.md): Define Python objects (dataclasses) that dbt expects to create and validate
- [`deps`](core/dbt/deps/README.md): Package installation and dependency resolution
- [`events`](core/dbt/events/README.md): Logging events
- [`graph`](core/dbt/graph/README.md): Produce a `networkx` DAG of project resources, and selecting those resources given user-supplied criteria
- [`include`](core/dbt/include/README.md): The dbt "global project," which defines default implementations of Jinja2 macros
- [`parser`](core/dbt/parser/README.md): Read project files, validate, construct python objects
- [`task`](core/dbt/task/README.md): Set forth the actions that dbt can perform when invoked
- [`adapters`](core/dbt/adapters): Define base classes for behavior that is likely to differ across databases
- [`clients`](core/dbt/clients): Interface with dependencies (agate, jinja) or across operating systems
- [`config`](core/dbt/config): Reconcile user-supplied configuration from connection profiles, project files, and Jinja macros
- [`context`](core/dbt/context): Build and expose dbt-specific Jinja functionality
- [`contracts`](core/dbt/contracts): Define Python objects (dataclasses) that dbt expects to create and validate
- [`deps`](core/dbt/deps): Package installation and dependency resolution
- [`graph`](core/dbt/graph): Produce a `networkx` DAG of project resources, and selecting those resources given user-supplied criteria
- [`include`](core/dbt/include): The dbt "global project," which defines default implementations of Jinja2 macros
- [`parser`](core/dbt/parser): Read project files, validate, construct python objects
- [`task`](core/dbt/task): Set forth the actions that dbt can perform when invoked
### Invoking dbt
@@ -45,4 +44,4 @@ The [`test/`](test/) subdirectory includes unit and integration tests that run a
- [docker](docker/): All dbt versions are published as Docker images on DockerHub. This subfolder contains the `Dockerfile` (constant) and `requirements.txt` (one for each version).
- [etc](etc/): Images for README
- [scripts](scripts/): Helper scripts for testing, releasing, and producing JSON schemas. These are not included in distributions of dbt, nor are they rigorously tested—they're just handy tools for the dbt maintainers :)
- [scripts](scripts/): Helper scripts for testing, releasing, and producing JSON schemas. These are not included in distributions of dbt, not are they rigorously tested—they're just handy tools for the dbt maintainers :)

View File

@@ -1,174 +1,4 @@
## dbt-core 1.1.0 (TBD)
### Features
- New Dockerfile to support specific db adapters and platforms. See docker/README.md for details ([#4495](https://github.com/dbt-labs/dbt-core/issues/4495), [#4487](https://github.com/dbt-labs/dbt-core/pull/4487))
### Fixes
- User wasn't asked for permission to overwite a profile entry when running init inside an existing project ([#4375](https://github.com/dbt-labs/dbt-core/issues/4375), [#4447](https://github.com/dbt-labs/dbt-core/pull/4447))
- Add project name validation to `dbt init` ([#4490](https://github.com/dbt-labs/dbt-core/issues/4490),[#4536](https://github.com/dbt-labs/dbt-core/pull/4536))
### Under the hood
- Testing cleanup ([#4496](https://github.com/dbt-labs/dbt-core/pull/4496), [#4509](https://github.com/dbt-labs/dbt-core/pull/4509))
- Clean up test deprecation warnings ([#3988](https://github.com/dbt-labs/dbt-core/issue/3988), [#4556](https://github.com/dbt-labs/dbt-core/pull/4556))
- Use mashumaro for serialization in event logging ([#4504](https://github.com/dbt-labs/dbt-core/issues/4504), [#4505](https://github.com/dbt-labs/dbt-core/pull/4505))
- Drop support for Python 3.7.0 + 3.7.1 ([#4584](https://github.com/dbt-labs/dbt-core/issues/4584), [#4585](https://github.com/dbt-labs/dbt-core/pull/4585), [#4643](https://github.com/dbt-labs/dbt-core/pull/4643))
Contributors:
- [@NiallRees](https://github.com/NiallRees) ([#4447](https://github.com/dbt-labs/dbt-core/pull/4447))
## dbt-core 1.0.2 (TBD)
### Fixes
- Projects created using `dbt init` now have the correct `seeds` directory created (instead of `data`) ([#4588](https://github.com/dbt-labs/dbt-core/issues/4588), [#4599](https://github.com/dbt-labs/dbt-core/pull/4589))
- Don't require a profile for dbt deps and clean commands ([#4554](https://github.com/dbt-labs/dbt-core/issues/4554), [#4610](https://github.com/dbt-labs/dbt-core/pull/4610))
- Select modified.body works correctly when new model added([#4570](https://github.com/dbt-labs/dbt-core/issues/4570), [#4631](https://github.com/dbt-labs/dbt-core/pull/4631))
- Fix bug in retry logic for bad response from hub and when there is a bad git tarball download. ([#4577](https://github.com/dbt-labs/dbt-core/issues/4577), [#4579](https://github.com/dbt-labs/dbt-core/issues/4579), [#4609](https://github.com/dbt-labs/dbt-core/pull/4609))
- Restore previous log level (DEBUG) when a test depends on a disabled resource. Still WARN if the resource is missing ([#4594](https://github.com/dbt-labs/dbt-core/issues/4594), [#4647](https://github.com/dbt-labs/dbt-core/pull/4647))
## dbt-core 1.0.1 (January 03, 2022)
* [@amirkdv](https://github.com/amirkdv) ([#4536](https://github.com/dbt-labs/dbt-core/pull/4536))
## dbt-core 1.0.1rc1 (December 20, 2021)
### Fixes
- Fix wrong url in the dbt docs overview homepage ([#4442](https://github.com/dbt-labs/dbt-core/pull/4442))
- Fix redefined status param of SQLQueryStatus to typecheck the string which passes on `._message` value of `AdapterResponse` or the `str` value sent by adapter plugin. ([#4463](https://github.com/dbt-labs/dbt-core/pull/4463#issuecomment-990174166))
- Fix `DepsStartPackageInstall` event to use package name instead of version number. ([#4482](https://github.com/dbt-labs/dbt-core/pull/4482))
- Reimplement log message to use adapter name instead of the object method. ([#4501](https://github.com/dbt-labs/dbt-core/pull/4501))
- Issue better error message for incompatible schemas ([#4470](https://github.com/dbt-labs/dbt-core/pull/4442), [#4497](https://github.com/dbt-labs/dbt-core/pull/4497))
- Remove secrets from error related to packages. ([#4507](https://github.com/dbt-labs/dbt-core/pull/4507))
- Prevent coercion of boolean values (`True`, `False`) to numeric values (`0`, `1`) in query results ([#4511](https://github.com/dbt-labs/dbt-core/issues/4511), [#4512](https://github.com/dbt-labs/dbt-core/pull/4512))
- Fix error with an env_var in a project hook ([#4523](https://github.com/dbt-labs/dbt-core/issues/4523), [#4524](https://github.com/dbt-labs/dbt-core/pull/4524))
- Add additional windows compat logic for colored log output. ([#4443](https://github.com/dbt-labs/dbt-core/issues/4443))
### Docs
- Fix missing data on exposures in docs ([#4467](https://github.com/dbt-labs/dbt-core/issues/4467))
Contributors:
- [remoyson](https://github.com/remoyson) ([#4442](https://github.com/dbt-labs/dbt-core/pull/4442))
## dbt-core 1.0.0 (December 3, 2021)
### Fixes
- Configure the CLI logger destination to use stdout instead of stderr ([#4368](https://github.com/dbt-labs/dbt-core/pull/4368))
- Make the size of `EVENT_HISTORY` configurable, via `EVENT_BUFFER_SIZE` global config ([#4411](https://github.com/dbt-labs/dbt-core/pull/4411), [#4416](https://github.com/dbt-labs/dbt-core/pull/4416))
- Change type of `log_format` in `profiles.yml` user config to be string, not boolean ([#4394](https://github.com/dbt-labs/dbt-core/pull/4394))
### Under the hood
- Only log cache events if `LOG_CACHE_EVENTS` is enabled, and disable by default. This restores previous behavior ([#4369](https://github.com/dbt-labs/dbt-core/pull/4369))
- Move event codes to be a top-level attribute of JSON-formatted logs, rather than nested in `data` ([#4381](https://github.com/dbt-labs/dbt-core/pull/4381))
- Fix failing integration test on Windows ([#4380](https://github.com/dbt-labs/dbt-core/pull/4380))
- Clean up warning messages for `clean` + `deps` ([#4366](https://github.com/dbt-labs/dbt-core/pull/4366))
- Use RFC3339 timestamps for log messages ([#4384](https://github.com/dbt-labs/dbt-core/pull/4384))
- Different text output for console (info) and file (debug) logs ([#4379](https://github.com/dbt-labs/dbt-core/pull/4379), [#4418](https://github.com/dbt-labs/dbt-core/pull/4418))
- Remove unused events. More structured `ConcurrencyLine`. Replace `\n` message starts/ends with `EmptyLine` events, and exclude `EmptyLine` from JSON-formatted output ([#4388](https://github.com/dbt-labs/dbt-core/pull/4388))
- Update `events` module README ([#4395](https://github.com/dbt-labs/dbt-core/pull/4395))
- Rework approach to JSON serialization for events with non-standard properties ([#4396](https://github.com/dbt-labs/dbt-core/pull/4396))
- Update legacy logger file name to `dbt.log.legacy` ([#4402](https://github.com/dbt-labs/dbt-core/pull/4402))
- Rollover `dbt.log` at 10 MB, and keep up to 5 backups, restoring previous behavior ([#4405](https://github.com/dbt-labs/dbt-core/pull/4405))
- Use reference keys instead of full relation objects in cache events ([#4410](https://github.com/dbt-labs/dbt-core/pull/4410))
- Add `node_type` contextual info to more events ([#4378](https://github.com/dbt-labs/dbt-core/pull/4378))
- Make `materialized` config optional in `node_type` ([#4417](https://github.com/dbt-labs/dbt-core/pull/4417))
- Stringify exception in `GenericExceptionOnRun` to support JSON serialization ([#4424](https://github.com/dbt-labs/dbt-core/pull/4424))
- Add "interop" tests for machine consumption of structured log output ([#4327](https://github.com/dbt-labs/dbt-core/pull/4327))
- Relax version specifier for `dbt-extractor` to `~=0.4.0`, to support compiled wheels for additional architectures when available ([#4427](https://github.com/dbt-labs/dbt-core/pull/4427))
## dbt-core 1.0.0rc3 (November 30, 2021)
### Fixes
- Support partial parsing of env_vars in metrics ([#4253](https://github.com/dbt-labs/dbt-core/issues/4293), [#4322](https://github.com/dbt-labs/dbt-core/pull/4322))
- Fix typo in `UnparsedSourceDefinition.__post_serialize__` ([#3545](https://github.com/dbt-labs/dbt-core/issues/3545), [#4349](https://github.com/dbt-labs/dbt-core/pull/4349))
### Under the hood
- Change some CompilationExceptions to ParsingExceptions ([#4254](http://github.com/dbt-labs/dbt-core/issues/4254), [#4328](https://github.com/dbt-core/pull/4328))
- Reorder logic for static parser sampling to speed up model parsing ([#4332](https://github.com/dbt-labs/dbt-core/pull/4332))
- Use more augmented assignment statements ([#4315](https://github.com/dbt-labs/dbt-core/issues/4315)), ([#4311](https://github.com/dbt-labs/dbt-core/pull/4331))
- Adjust logic when finding approximate matches for models and tests ([#3835](https://github.com/dbt-labs/dbt-core/issues/3835)), [#4076](https://github.com/dbt-labs/dbt-core/pull/4076))
- Restore small previous behaviors for logging: JSON formatting for first few events; `WARN`-level stdout for `list` task; include tracking events in `dbt.log` ([#4341](https://github.com/dbt-labs/dbt-core/pull/4341))
Contributors:
- [@sarah-weatherbee](https://github.com/sarah-weatherbee) ([#4331](https://github.com/dbt-labs/dbt-core/pull/4331))
- [@emilieschario](https://github.com/emilieschario) ([#4076](https://github.com/dbt-labs/dbt-core/pull/4076))
- [@sneznaj](https://github.com/sneznaj) ([#4349](https://github.com/dbt-labs/dbt-core/pull/4349))
## dbt-core 1.0.0rc2 (November 22, 2021)
### Breaking changes
- Restrict secret env vars (prefixed `DBT_ENV_SECRET_`) to `profiles.yml` + `packages.yml` _only_. Raise an exception if a secret env var is used elsewhere ([#4310](https://github.com/dbt-labs/dbt-core/issues/4310), [#4311](https://github.com/dbt-labs/dbt-core/pull/4311))
- Reorder arguments to `config.get()` so that `default` is second ([#4273](https://github.com/dbt-labs/dbt-core/issues/4273), [#4297](https://github.com/dbt-labs/dbt-core/pull/4297))
### Features
- Avoid error when missing column in YAML description ([#4151](https://github.com/dbt-labs/dbt-core/issues/4151), [#4285](https://github.com/dbt-labs/dbt-core/pull/4285))
- Allow `--defer` flag to `dbt snapshot` ([#4110](https://github.com/dbt-labs/dbt-core/issues/4110), [#4296](https://github.com/dbt-labs/dbt-core/pull/4296))
- Install prerelease packages when `version` explicitly references a prerelease version, regardless of `install-prerelease` status ([#4243](https://github.com/dbt-labs/dbt-core/issues/4243), [#4295](https://github.com/dbt-labs/dbt-core/pull/4295))
- Add data attributes to json log messages ([#4301](https://github.com/dbt-labs/dbt-core/pull/4301))
- Add event codes to all log events ([#4319](https://github.com/dbt-labs/dbt-core/pull/4319))
### Fixes
- Fix serialization error with missing quotes in metrics model ref ([#4252](https://github.com/dbt-labs/dbt-core/issues/4252), [#4287](https://github.com/dbt-labs/dbt-core/pull/4289))
- Correct definition of 'created_at' in ParsedMetric nodes ([#4298](http://github.com/dbt-labs/dbt-core/issues/4298), [#4299](https://github.com/dbt-labs/dbt-core/pull/4299))
### Fixes
- Allow specifying default in Jinja config.get with default keyword ([#4273](https://github.com/dbt-labs/dbt-core/issues/4273), [#4297](https://github.com/dbt-labs/dbt-core/pull/4297))
- Fix serialization error with missing quotes in metrics model ref ([#4252](https://github.com/dbt-labs/dbt-core/issues/4252), [#4287](https://github.com/dbt-labs/dbt-core/pull/4289))
- Correct definition of 'created_at' in ParsedMetric nodes ([#4298](https://github.com/dbt-labs/dbt-core/issues/4298), [#4299](https://github.com/dbt-labs/dbt-core/pull/4299))
### Under the hood
- Add --indirect-selection parameter to profiles.yml and builtin DBT_ env vars; stringified parameter to enable multi-modal use ([#3997](https://github.com/dbt-labs/dbt-core/issues/3997), [#4270](https://github.com/dbt-labs/dbt-core/pull/4270))
- Fix filesystem searcher test failure on Python 3.9 ([#3689](https://github.com/dbt-labs/dbt-core/issues/3689), [#4271](https://github.com/dbt-labs/dbt-core/pull/4271))
- Clean up deprecation warnings shown for `dbt_project.yml` config renames ([#4276](https://github.com/dbt-labs/dbt-core/issues/4276), [#4291](https://github.com/dbt-labs/dbt-core/pull/4291))
- Fix metrics count in compiled project stats ([#4290](https://github.com/dbt-labs/dbt-core/issues/4290), [#4292](https://github.com/dbt-labs/dbt-core/pull/4292))
- First pass at supporting more dbt tasks via python lib ([#4200](https://github.com/dbt-labs/dbt-core/pull/4200))
Contributors:
- [@kadero](https://github.com/kadero) ([#4285](https://github.com/dbt-labs/dbt-core/pull/4285), [#4296](https://github.com/dbt-labs/dbt-core/pull/4296))
- [@joellabes](https://github.com/joellabes) ([#4295](https://github.com/dbt-labs/dbt-core/pull/4295))
## dbt-core 1.0.0rc1 (November 10, 2021)
### Breaking changes
- Replace `greedy` flag/property for test selection with `indirect_selection: eager/cautious` flag/property. Set to `eager` by default. **Note:** This reverts test selection to its pre-v0.20 behavior by default. `dbt test -s my_model` _will_ select multi-parent tests, such as `relationships`, that depend on unselected resources. To achieve the behavior change in v0.20 + v0.21, set `--indirect-selection=cautious` on the CLI or `indirect_selection: cautious` in yaml selectors. ([#4082](https://github.com/dbt-labs/dbt-core/issues/4082), [#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- In v1.0.0, **`pip install dbt` will raise an explicit error.** Instead, please use `pip install dbt-<adapter>` (to use dbt with that database adapter), or `pip install dbt-core` (for core functionality). For parity with the previous behavior of `pip install dbt`, you can use: `pip install dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery` ([#4100](https://github.com/dbt-labs/dbt-core/issues/4100), [#4133](https://github.com/dbt-labs/dbt-core/pull/4133))
- Reorganize the `global_project` (macros) into smaller files with clearer names. Remove unused global macros: `column_list`, `column_list_for_create_table`, `incremental_upsert` ([#4154](https://github.com/dbt-labs/dbt-core/pull/4154))
- Introduce structured event interface, and begin conversion of all legacy logging ([#3359](https://github.com/dbt-labs/dbt-core/issues/3359), [#4055](https://github.com/dbt-labs/dbt-core/pull/4055))
- **This is a breaking change for adapter plugins, requiring a very simple migration.** See [`events` module README](core/dbt/events/README.md#adapter-maintainers) for details.
- If you maintain another kind of dbt-core plugin that makes heavy use of legacy logging, and you need time to cut over to the new event interface, you can re-enable the legacy logger via an environment variable shim, `DBT_ENABLE_LEGACY_LOGGER=True`. Be advised that we will remove this capability in a future version of dbt-core.
### Features
- Allow nullable `error_after` in source freshness ([#3874](https://github.com/dbt-labs/dbt-core/issues/3874), [#3955](https://github.com/dbt-labs/dbt-core/pull/3955))
- Add `metrics` nodes ([#4071](https://github.com/dbt-labs/dbt-core/issues/4071), [#4235](https://github.com/dbt-labs/dbt-core/pull/4235))
- Add support for `dbt init <project_name>`, and support for `skip_profile_setup` argument (`dbt init -s`) ([#4156](https://github.com/dbt-labs/dbt-core/issues/4156), [#4249](https://github.com/dbt-labs/dbt-core/pull/4249))
### Fixes
- Changes unit tests using `assertRaisesRegexp` to `assertRaisesRegex` ([#4136](https://github.com/dbt-labs/dbt-core/issues/4132), [#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- Allow retries when the answer from a `dbt deps` is `None` ([#4178](https://github.com/dbt-labs/dbt-core/issues/4178), [#4225](https://github.com/dbt-labs/dbt-core/pull/4225))
### Docs
- Fix non-alphabetical sort of Source Tables in source overview page ([docs#81](https://github.com/dbt-labs/dbt-docs/issues/81), [docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- Add title tag to node elements in tree ([docs#202](https://github.com/dbt-labs/dbt-docs/issues/202), [docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
- Account for test rename: `schema` &rarr; `generic`, `data` &rarr;` singular`. Use `test_metadata` instead of `schema`/`data` tags to differentiate ([docs#216](https://github.com/dbt-labs/dbt-docs/issues/216), [docs#222](https://github.com/dbt-labs/dbt-docs/pull/222))
- Add `metrics` ([core#216](https://github.com/dbt-labs/dbt-core/issues/4235), [docs#223](https://github.com/dbt-labs/dbt-docs/pull/223))
### Under the hood
- Bump artifact schema versions for 1.0.0: manifest v4, run results v4, sources v3. Notable changes: added `metrics` nodes; schema test + data test nodes are renamed to generic test + singular test nodes; freshness threshold default values ([#4191](https://github.com/dbt-labs/dbt-core/pull/4191))
- Speed up node selection by skipping `incorporate_indirect_nodes` if not needed ([#4213](https://github.com/dbt-labs/dbt-core/issues/4213), [#4214](https://github.com/dbt-labs/dbt-core/issues/4214))
- When `on_schema_change` is set, pass common columns as `dest_columns` in incremental merge macros ([#4144](https://github.com/dbt-labs/dbt-core/issues/4144), [#4170](https://github.com/dbt-labs/dbt-core/pull/4170))
- Clear adapters before registering in `lib` module config generation ([#4218](https://github.com/dbt-labs/dbt-core/pull/4218))
- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#4223](https://github.com/dbt-labs/dbt-core/pull/4223))
Contributors:
- [@kadero](https://github.com/kadero) ([#3955](https://github.com/dbt-labs/dbt-core/pull/3955), [#4249](https://github.com/dbt-labs/dbt-core/pull/4249))
- [@frankcash](https://github.com/frankcash) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4136))
- [@Kayrnt](https://github.com/Kayrnt) ([#4136](https://github.com/dbt-labs/dbt-core/pull/4170))
- [@VersusFacit](https://github.com/VersusFacit) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@joellabes](https://github.com/joellabes) ([#4104](https://github.com/dbt-labs/dbt-core/pull/4104))
- [@b-per](https://github.com/b-per) ([#4225](https://github.com/dbt-labs/dbt-core/pull/4225))
- [@salmonsd](https://github.com/salmonsd) ([docs#218](https://github.com/dbt-labs/dbt-docs/pull/218))
- [@miike](https://github.com/miike) ([docs#203](https://github.com/dbt-labs/dbt-docs/pull/203))
## dbt-core 1.0.0 (Release TBD)
## dbt-core 1.0.0b2 (October 25, 2021)
@@ -185,16 +15,12 @@ Contributors:
- `dbt init` is now interactive, generating profiles.yml when run inside existing project ([#3625](https://github.com/dbt-labs/dbt/pull/3625))
### Under the hood
- Fix intermittent errors in partial parsing tests ([#4060](https://github.com/dbt-labs/dbt-core/issues/4060), [#4068](https://github.com/dbt-labs/dbt-core/pull/4068))
- Make finding disabled nodes more consistent ([#4069](https://github.com/dbt-labs/dbt-core/issues/4069), [#4073](https://github.com/dbt-labas/dbt-core/pull/4073))
- Remove connection from `render_with_context` during parsing, thereby removing misleading log message ([#3137](https://github.com/dbt-labs/dbt-core/issues/3137), [#4062](https://github.com/dbt-labas/dbt-core/pull/4062))
- Wait for postgres docker container to be ready in `setup_db.sh`. ([#3876](https://github.com/dbt-labs/dbt-core/issues/3876), [#3908](https://github.com/dbt-labs/dbt-core/pull/3908))
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Prefer macros defined in the project over the ones in a package by default ([#4106](https://github.com/dbt-labs/dbt-core/issues/4106), [#4114](https://github.com/dbt-labs/dbt-core/pull/4114))
- Dependency updates ([#4079](https://github.com/dbt-labs/dbt-core/pull/4079)), ([#3532](https://github.com/dbt-labs/dbt-core/pull/3532)
- Schedule partial parsing for SQL files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4101](https://github.com/dbt-labs/dbt-core/pull/4101))
- Schedule partial parsing for schema files with env_var changes ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4162](https://github.com/dbt-labs/dbt-core/pull/4162))
- Skip partial parsing when env_vars change in dbt_project or profile ([#3885](https://github.com/dbt-labs/dbt-core/issues/3885), [#4212](https://github.com/dbt-labs/dbt-core/pull/4212))
Contributors:
- [@sungchun12](https://github.com/sungchun12) ([#4017](https://github.com/dbt-labs/dbt/pull/4017))
@@ -228,6 +54,7 @@ Contributors:
- Fixed bug with `error_if` test option ([#4070](https://github.com/dbt-labs/dbt-core/pull/4070))
### Under the hood
- Enact deprecation for `materialization-return` and replace deprecation warning with an exception. ([#3896](https://github.com/dbt-labs/dbt-core/issues/3896))
- Build catalog for only relational, non-ephemeral nodes in the graph ([#3920](https://github.com/dbt-labs/dbt-core/issues/3920))
- Enact deprecation to remove the `release` arg from the `execute_macro` method. ([#3900](https://github.com/dbt-labs/dbt-core/issues/3900))
@@ -240,7 +67,6 @@ Contributors:
- Update the default project paths to be `analysis-paths = ['analyses']` and `test-paths = ['tests]`. Also have starter project set `analysis-paths: ['analyses']` from now on. ([#2659](https://github.com/dbt-labs/dbt-core/issues/2659))
- Define the data type of `sources` as an array of arrays of string in the manifest artifacts. ([#3966](https://github.com/dbt-labs/dbt-core/issues/3966), [#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- Marked `source-paths` and `data-paths` as deprecated keys in `dbt_project.yml` in favor of `model-paths` and `seed-paths` respectively.([#1607](https://github.com/dbt-labs/dbt-core/issues/1607))
- Surface git errors to `stdout` when cloning dbt packages from Github. ([#3167](https://github.com/dbt-labs/dbt-core/issues/3167))
Contributors:
@@ -249,25 +75,14 @@ Contributors:
- [@samlader](https://github.com/samlader) ([#3993](https://github.com/dbt-labs/dbt-core/pull/3993))
- [@yu-iskw](https://github.com/yu-iskw) ([#3967](https://github.com/dbt-labs/dbt-core/pull/3967))
- [@laxjesse](https://github.com/laxjesse) ([#4019](https://github.com/dbt-labs/dbt-core/pull/4019))
- [@gitznik](https://github.com/Gitznik) ([#4124](https://github.com/dbt-labs/dbt-core/pull/4124))
## dbt 0.21.1 (November 29, 2021)
### Fixes
- Add `get_where_subquery` to test macro namespace, fixing custom generic tests that rely on introspecting the `model` arg at parse time ([#4195](https://github.com/dbt-labs/dbt/issues/4195), [#4197](https://github.com/dbt-labs/dbt/pull/4197))
## dbt 0.21.1rc1 (November 03, 2021)
## dbt 0.21.1 (Release TBD)
### Fixes
- Performance: Use child_map to find tests for nodes in resolve_graph ([#4012](https://github.com/dbt-labs/dbt/issues/4012), [#4022](https://github.com/dbt-labs/dbt/pull/4022))
- Switch `unique_field` from abstractproperty to optional property. Add docstring ([#4025](https://github.com/dbt-labs/dbt/issues/4025), [#4028](https://github.com/dbt-labs/dbt/pull/4028))
- Include only relational nodes in `database_schema_set` ([#4063](https://github.com/dbt-labs/dbt-core/issues/4063), [#4077](https://github.com/dbt-labs/dbt-core/pull/4077))
- Added support for tests on databases that lack real boolean types. ([#4084](https://github.com/dbt-labs/dbt-core/issues/4084))
- Scrub secrets coming from `CommandError`s so they don't get exposed in logs. ([#4138](https://github.com/dbt-labs/dbt-core/pull/4138))
- Syntax fix in `alter_relation_add_remove_columns` if only removing columns in `on_schema_change: sync_all_columns` ([#4147](https://github.com/dbt-labs/dbt-core/issues/4147))
- Increase performance of graph subset selection ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135),[#4155](https://github.com/dbt-labs/dbt-core/pull/4155))
- Add downstream test edges for `build` task _only_. Restore previous graph construction, compilation performance, and node selection behavior (`test+`) for all other tasks ([#4135](https://github.com/dbt-labs/dbt-core/issues/4135), [#4143](https://github.com/dbt-labs/dbt-core/pull/4143))
- Don't require a strict/proper subset when adding testing edges to specialized graph for `build` ([#4158](https://github.com/dbt-labs/dbt-core/issues/4135), [#4158](https://github.com/dbt-labs/dbt-core/pull/4160))
Contributors:
- [@ljhopkins2](https://github.com/ljhopkins2) ([#4077](https://github.com/dbt-labs/dbt-core/pull/4077))
@@ -395,7 +210,7 @@ Contributors:
- [@jmriego](https://github.com/jmriego) ([#3526](https://github.com/dbt-labs/dbt-core/pull/3526))
- [@danielefrigo](https://github.com/danielefrigo) ([#3547](https://github.com/dbt-labs/dbt-core/pull/3547))
## dbt 0.20.2 (September 07, 2021)
## dbt 0.20.2 (Release TBD)
### Under the hood

View File

@@ -10,7 +10,7 @@
## About this document
This document is a guide intended for folks interested in contributing to `dbt-core`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt-core`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
This document is a guide intended for folks interested in contributing to `dbt`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
If you're new to python development or contributing to open-source software, we encourage you to read this document from start to finish. If you get stuck, drop us a line in the `#dbt-core-development` channel on [slack](https://community.getdbt.com).
@@ -20,101 +20,101 @@ If you have an issue or code change suggestion related to a specific database [a
### Signing the CLA
Please note that all contributors to `dbt-core` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt-core` codebase. If you are unable to sign the CLA, then the `dbt-core` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
Please note that all contributors to `dbt` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into the `dbt` codebase. If you are unable to sign the CLA, then the `dbt` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
## Proposing a change
`dbt-core` is Apache 2.0-licensed open source software. `dbt-core` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
`dbt` is Apache 2.0-licensed open source software. `dbt` is what it is today because community members like you have opened issues, provided feedback, and contributed to the knowledge loop for the entire communtiy. Whether you are a seasoned open source contributor or a first-time committer, we welcome and encourage you to contribute code, documentation, ideas, or problem statements to this project.
### Defining the problem
If you have an idea for a new feature or if you've discovered a bug in `dbt-core`, the first step is to open an issue. Please check the list of [open issues](https://github.com/dbt-labs/dbt-core/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt-core` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
If you have an idea for a new feature or if you've discovered a bug in `dbt`, the first step is to open an issue. Please check the list of [open issues](https://github.com/dbt-labs/dbt-core/issues) before creating a new one. If you find a relevant issue, please add a comment to the open issue instead of creating a new one. There are hundreds of open issues in this repository and it can be hard to know where to look for a relevant open issue. **The `dbt` maintainers are always happy to point contributors in the right direction**, so please err on the side of documenting your idea in a new issue if you are unsure where a problem statement belongs.
> **Note:** All community-contributed Pull Requests _must_ be associated with an open issue. If you submit a Pull Request that does not pertain to an open issue, you will be asked to create an issue describing the problem before the Pull Request can be reviewed.
### Discussing the idea
After you open an issue, a `dbt-core` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt-core` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
After you open an issue, a `dbt` maintainer will follow up by commenting on your issue (usually within 1-3 days) to explore your idea further and advise on how to implement the suggested changes. In many cases, community members will chime in with their own thoughts on the problem statement. If you as the issue creator are interested in submitting a Pull Request to address the issue, you should indicate this in the body of the issue. The `dbt` maintainers are _always_ happy to help contributors with the implementation of fixes and features, so please also indicate if there's anything you're unsure about or could use guidance around in the issue.
### Submitting a change
If an issue is appropriately well scoped and describes a beneficial change to the `dbt-core` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
If an issue is appropriately well scoped and describes a beneficial change to the `dbt` codebase, then anyone may submit a Pull Request to implement the functionality described in the issue. See the sections below on how to do this.
The `dbt-core` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/dbt-labs/dbt-core/contribute) page.
The `dbt` maintainers will add a `good first issue` label if an issue is suitable for a first-time contributor. This label often means that the required code change is small, limited to one database adapter, or a net-new addition that does not impact existing functionality. You can see the list of currently open issues on the [Contribute](https://github.com/dbt-labs/dbt-core/contribute) page.
Here's a good workflow:
- Comment on the open issue, expressing your interest in contributing the required code change
- Outline your planned implementation. If you want help getting started, ask!
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt-core` maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt-core`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
- Follow the steps outlined below to develop locally. Once you have opened a PR, one of the `dbt` maintainers will work with you to review your code.
- Add a test! Tests are crucial for both fixes and new features alike. We want to make sure that code works as intended, and that it avoids any bugs previously encountered. Currently, the best resource for understanding `dbt`'s [unit](test/unit) and [integration](test/integration) tests is the tests themselves. One of the maintainers can help by pointing out relevant examples.
In some cases, the right resolution to an open issue might be tangential to the `dbt-core` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt-core` maintainers are unwilling or unable to incorporate into the `dbt-core` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt-core` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
In some cases, the right resolution to an open issue might be tangential to the `dbt` codebase. The right path forward might be a documentation update or a change that can be made in user-space. In other cases, the issue might describe functionality that the `dbt` maintainers are unwilling or unable to incorporate into the `dbt` codebase. When it is determined that an open issue describes functionality that will not translate to a code change in the `dbt` repository, the issue will be tagged with the `wontfix` label (see below) and closed.
### Using issue labels
The `dbt-core` maintainers use labels to categorize open issues. Most labels describe the domain in the `dbt-core` codebase germane to the discussion.
The `dbt` maintainers use labels to categorize open issues. Some labels indicate the databases impacted by the issue, while others describe the domain in the `dbt` codebase germane to the discussion. While most of these labels are self-explanatory (eg. `snowflake` or `bigquery`), there are others that are worth describing.
| tag | description |
| --- | ----------- |
| [triage](https://github.com/dbt-labs/dbt-core/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt-core` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/dbt-labs/dbt-core/labels/bug) | This issue represents a defect or regression in `dbt-core` |
| [enhancement](https://github.com/dbt-labs/dbt-core/labels/enhancement) | This issue represents net-new functionality in `dbt-core` |
| [good first issue](https://github.com/dbt-labs/dbt-core/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt-core` codebase to implement. This issue is appropriate for a first-time contributor. |
| [triage](https://github.com/dbt-labs/dbt-core/labels/triage) | This is a new issue which has not yet been reviewed by a `dbt` maintainer. This label is removed when a maintainer reviews and responds to the issue. |
| [bug](https://github.com/dbt-labs/dbt-core/labels/bug) | This issue represents a defect or regression in `dbt` |
| [enhancement](https://github.com/dbt-labs/dbt-core/labels/enhancement) | This issue represents net-new functionality in `dbt` |
| [good first issue](https://github.com/dbt-labs/dbt-core/labels/good%20first%20issue) | This issue does not require deep knowledge of the `dbt` codebase to implement. This issue is appropriate for a first-time contributor. |
| [help wanted](https://github.com/dbt-labs/dbt-core/labels/help%20wanted) / [discussion](https://github.com/dbt-labs/dbt-core/labels/discussion) | Conversation around this issue in ongoing, and there isn't yet a clear path forward. Input from community members is most welcome. |
| [duplicate](https://github.com/dbt-labs/dbt-core/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt-core` maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/dbt-labs/dbt-core/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt-core` maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/dbt-labs/dbt-core/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt-core` maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/dbt-labs/dbt-core/labels/wontfix) | This issue does not require a code change in the `dbt-core` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
| [duplicate](https://github.com/dbt-labs/dbt-core/issues/duplicate) | This issue is functionally identical to another open issue. The `dbt` maintainers will close this issue and encourage community members to focus conversation on the other one. |
| [snoozed](https://github.com/dbt-labs/dbt-core/labels/snoozed) | This issue describes a good idea, but one which will probably not be addressed in a six-month time horizon. The `dbt` maintainers will revist these issues periodically and re-prioritize them accordingly. |
| [stale](https://github.com/dbt-labs/dbt-core/labels/stale) | This is an old issue which has not recently been updated. Stale issues will periodically be closed by `dbt` maintainers, but they can be re-opened if the discussion is restarted. |
| [wontfix](https://github.com/dbt-labs/dbt-core/labels/wontfix) | This issue does not require a code change in the `dbt` repository, or the maintainers are unwilling/unable to merge a Pull Request which implements the behavior described in the issue. |
#### Branching Strategy
`dbt-core` has three types of branches:
`dbt` has three types of branches:
- **Trunks** are where active development of the next release takes place. There is one trunk named `main` at the time of writing this, and will be the default branch of the repository.
- **Release Branches** track a specific, not yet complete release of `dbt-core`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt-core`.
- **Trunks** are where active development of the next release takes place. There is one trunk named `develop` at the time of writing this, and will be the default branch of the repository.
- **Release Branches** track a specific, not yet complete release of `dbt`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt`.
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk branch or a specific release branch.
## Getting the code
### Installing git
You will need `git` in order to download and modify the `dbt-core` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
You will need `git` in order to download and modify the `dbt` source code. On macOS, the best way to download git is to just install [Xcode](https://developer.apple.com/support/xcode/).
### External contributors
If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt-core` by forking the `dbt-core` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt` by forking the `dbt` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
1. fork the `dbt-core` repository
1. fork the `dbt` repository
2. clone your fork locally
3. check out a new branch for your proposed changes
4. push changes to your fork
5. open a pull request against `dbt-labs/dbt` from your forked repository
### dbt Labs contributors
### Core contributors
If you are a member of the `dbt-labs` GitHub organization, you will have push access to the `dbt-core` repo. Rather than forking `dbt-core` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
If you are a member of the `dbt-labs` GitHub organization, you will have push access to the `dbt` repo. Rather than forking `dbt` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
## Setting up an environment
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt-core` development, many of these tools are used commonly across open-source python projects.
There are some tools that will be helpful to you in developing locally. While this is the list relevant for `dbt` development, many of these tools are used commonly across open-source python projects.
### Tools
A short list of tools used in `dbt-core` testing that will be helpful to your understanding:
A short list of tools used in `dbt` testing that will be helpful to your understanding:
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.7, Python 3.8, and Python 3.9
- [`tox`](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions. We currently target the latest patch releases for Python 3.6, Python 3.7, Python 3.8, and Python 3.9
- [`pytest`](https://docs.pytest.org/en/latest/) to discover/run tests
- [`make`](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [`flake8`](https://flake8.pycqa.org/en/latest/) for code linting
- [`mypy`](https://mypy.readthedocs.io/en/stable/) for static type checking
- [Github Actions](https://github.com/features/actions)
A deep understanding of these tools in not required to effectively contribute to `dbt-core`, but we recommend checking out the attached documentation if you're interested in learning more about them.
A deep understanding of these tools in not required to effectively contribute to `dbt`, but we recommend checking out the attached documentation if you're interested in learning more about them.
#### virtual environments
We strongly recommend using virtual environments when developing code in `dbt-core`. We recommend creating this virtualenv
in the root of the `dbt-core` repository. To create a new virtualenv, run:
We strongly recommend using virtual environments when developing code in `dbt`. We recommend creating this virtualenv
in the root of the `dbt` repository. To create a new virtualenv, run:
```sh
python3 -m venv env
source env/bin/activate
@@ -135,11 +135,11 @@ For testing, and later in the examples in this document, you may want to have `p
brew install postgresql
```
## Running `dbt-core` in development
## Running `dbt` in development
### Installation
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt-core` (and its dependencies) with:
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt` (and its dependencies) with:
```sh
make dev
@@ -147,24 +147,23 @@ make dev
pip install -r dev-requirements.txt -r editable-requirements.txt
```
When `dbt-core` is installed this way, any changes you make to the `dbt-core` source code will be reflected immediately in your next `dbt-core` run.
When `dbt` is installed this way, any changes you make to the `dbt` source code will be reflected immediately in your next `dbt` run.
### Running `dbt`
### Running `dbt-core`
With your virtualenv activated, the `dbt-core` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as necessary to connect to your target databases. It may be a good idea to add a new profile pointing to a local postgres instance, or a specific test sandbox within your data warehouse if appropriate.
## Testing
Getting the `dbt-core` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt-core`. The section that follows outlines some helpful tips for setting up the test environment.
Getting the `dbt` integration tests set up in your local environment will be very helpful as you start to make changes to your local version of `dbt`. The section that follows outlines some helpful tips for setting up the test environment.
Although `dbt-core` works with a number of different databases, you won't need to supply credentials for every one of these databases in your test environment. Instead you can test all dbt-core code changes with Python and Postgres.
Although `dbt` works with a number of different databases, you won't need to supply credentials for every one of these databases in your test environment. Instead you can test all dbt-core code changes with Python and Postgres.
### Initial setup
We recommend starting with `dbt-core`'s Postgres tests. These tests cover most of the functionality in `dbt-core`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
We recommend starting with `dbt`'s Postgres tests. These tests cover most of the functionality in `dbt`, are the fastest to run, and are the easiest to set up. To run the Postgres integration tests, you'll have to do one extra step of setting up the test database:
```sh
make setup-db
@@ -175,6 +174,15 @@ docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
```
`dbt` uses test credentials specified in a `test.env` file in the root of the repository for non-Postgres databases. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against `dbt`. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials. This step is only required to use non-Postgres databases.
```
cp test.env.sample test.env
$EDITOR test.env
```
> In general, it's most important to have successful unit and Postgres tests. Once you open a PR, `dbt` will automatically run integration tests for the other three core database adapters. Of course, if you are a BigQuery user, contributing a BigQuery-only feature, it's important to run BigQuery tests as well.
### Test commands
There are a few methods for running tests locally.
@@ -200,9 +208,9 @@ suites.
[`tox`](https://tox.readthedocs.io/en/latest/) takes care of managing virtualenvs and install dependencies in order to run
tests. You can also run tests in parallel, for example, you can run unit tests
for Python 3.7, Python 3.8, Python 3.9, `flake8` checks, and `mypy` checks in
for Python 3.6, Python 3.7, Python 3.8, `flake8` checks, and `mypy` checks in
parallel with `tox -p`. Also, you can run unit tests for specific python versions
with `tox -e py37`. The configuration for these tests in located in `tox.ini`.
with `tox -e py36`. The configuration for these tests in located in `tox.ini`.
#### `pytest`
@@ -222,8 +230,6 @@ python -m pytest test/unit/test_graph.py::GraphTest::test__dependency_list
dbt Labs provides a CI environment to test changes to specific adapters, and periodic maintenance checks of `dbt-core` through Github Actions. For example, if you submit a pull request to the `dbt-redshift` repo, GitHub will trigger automated code checks and tests against Redshift.
A `dbt-core` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
- First time contributors should note code checks + unit tests require a maintainer to approve.
A `dbt` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
Once all tests are passing and your PR has been approved, a `dbt-core` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
Once all tests are passing and your PR has been approved, a `dbt` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:

View File

@@ -1,8 +1,3 @@
##
# This dockerfile is used for local development and adapter testing only.
# See `/docker` for a generic and production-ready docker file
##
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND noninteractive

View File

@@ -1,42 +0,0 @@
<p align="center">
<img src="https://raw.githubusercontent.com/dbt-labs/dbt-core/fa1ea14ddfb1d5ae319d5141844910dd53ab2834/etc/dbt-core.svg" alt="dbt logo" width="750"/>
</p>
<p align="center">
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/main.yml/badge.svg?event=push" alt="Unit Tests Badge"/>
</a>
<a href="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml">
<img src="https://github.com/dbt-labs/dbt-core/actions/workflows/integration.yml/badge.svg?event=push" alt="Integration Tests Badge"/>
</a>
</p>
**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
![architecture](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-arch.png)
## Understanding dbt
Analysts using dbt can transform their data by simply writing select statements, while dbt handles turning these statements into tables and views in a data warehouse.
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
![dbt dag](https://raw.githubusercontent.com/dbt-labs/dbt-core/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-dag.png)
## Getting started
- [Install dbt](https://docs.getdbt.com/docs/installation)
- Read the [introduction](https://docs.getdbt.com/docs/introduction/) and [viewpoint](https://docs.getdbt.com/docs/about/viewpoint/)
## Join the dbt Community
- Be part of the conversation in the [dbt Community Slack](http://community.getdbt.com/)
- Read more on the [dbt Community Discourse](https://discourse.getdbt.com)
## Reporting bugs and contributing code
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-core/issues/new)
- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md)
## Code of Conduct
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct).

View File

@@ -1,52 +0,0 @@
# core/dbt directory README
## The following are individual files in this directory.
### deprecations.py
### flags.py
### main.py
### tracking.py
### version.py
### lib.py
### node_types.py
### helper_types.py
### links.py
### semver.py
### ui.py
### compilation.py
### dataclass_schema.py
### exceptions.py
### hooks.py
### logger.py
### profiler.py
### utils.py
## The subdirectories will be documented in a README in the subdirectory
* config
* include
* adapters
* context
* deps
* graph
* task
* clients
* events

View File

@@ -1 +0,0 @@
# Adapters README

View File

@@ -18,17 +18,7 @@ from dbt.contracts.graph.manifest import Manifest
from dbt.adapters.base.query_headers import (
MacroQueryStringSetter,
)
from dbt.events.functions import fire_event
from dbt.events.types import (
NewConnection,
ConnectionReused,
ConnectionLeftOpen,
ConnectionLeftOpen2,
ConnectionClosed,
ConnectionClosed2,
Rollback,
RollbackFailed
)
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import flags
@@ -146,10 +136,14 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
if conn.name == conn_name and conn.state == 'open':
return conn
fire_event(NewConnection(conn_name=conn_name, conn_type=self.TYPE))
logger.debug(
'Acquiring new {} connection "{}".'.format(self.TYPE, conn_name))
if conn.state == 'open':
fire_event(ConnectionReused(conn_name=conn_name))
logger.debug(
'Re-using an available connection from the pool (formerly {}).'
.format(conn.name)
)
else:
conn.handle = LazyHandle(self.open)
@@ -196,9 +190,11 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
with self.lock:
for connection in self.thread_connections.values():
if connection.state not in {'closed', 'init'}:
fire_event(ConnectionLeftOpen(conn_name=connection.name))
logger.debug("Connection '{}' was left open."
.format(connection.name))
else:
fire_event(ConnectionClosed(conn_name=connection.name))
logger.debug("Connection '{}' was properly closed."
.format(connection.name))
self.close(connection)
# garbage collect these connections
@@ -224,17 +220,20 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
try:
connection.handle.rollback()
except Exception:
fire_event(RollbackFailed(conn_name=connection.name))
logger.debug(
'Failed to rollback {}'.format(connection.name),
exc_info=True
)
@classmethod
def _close_handle(cls, connection: Connection) -> None:
"""Perform the actual close operation."""
# On windows, sometimes connection handles don't have a close() attr.
if hasattr(connection.handle, 'close'):
fire_event(ConnectionClosed2(conn_name=connection.name))
logger.debug(f'On {connection.name}: Close')
connection.handle.close()
else:
fire_event(ConnectionLeftOpen2(conn_name=connection.name))
logger.debug(f'On {connection.name}: No close available on handle')
@classmethod
def _rollback(cls, connection: Connection) -> None:
@@ -245,7 +244,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
f'"{connection.name}", but it does not have one open!'
)
fire_event(Rollback(conn_name=connection.name))
logger.debug(f'On {connection.name}: ROLLBACK')
cls._rollback_handle(connection)
connection.transaction_open = False
@@ -257,7 +256,7 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
return connection
if connection.transaction_open and connection.handle:
fire_event(Rollback(conn_name=connection.name))
logger.debug('On {}: ROLLBACK'.format(connection.name))
cls._rollback_handle(connection)
connection.transaction_open = False

View File

@@ -29,8 +29,7 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.manifest import Manifest, MacroManifest
from dbt.contracts.graph.parsed import ParsedSeedNode
from dbt.exceptions import warn_or_error
from dbt.events.functions import fire_event
from dbt.events.types import CacheMiss, ListRelations
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import filter_null_values, executor
from dbt.adapters.base.connections import Connection, AdapterResponse
@@ -39,7 +38,7 @@ from dbt.adapters.base.relation import (
ComponentName, BaseRelation, InformationSchema, SchemaSearchMap
)
from dbt.adapters.base import Column as BaseColumn
from dbt.adapters.cache import RelationsCache, _make_key
from dbt.adapters.cache import RelationsCache
SeedModel = Union[ParsedSeedNode, CompiledSeedNode]
@@ -289,12 +288,9 @@ class BaseAdapter(metaclass=AdapterMeta):
"""Check if the schema is cached, and by default logs if it is not."""
if (database, schema) not in self.cache:
fire_event(
CacheMiss(
conn_name=self.nice_connection_name(),
database=database,
schema=schema
)
logger.debug(
'On "{}": cache miss for schema "{}.{}", this is inefficient'
.format(self.nice_connection_name(), database, schema)
)
return False
else:
@@ -676,12 +672,9 @@ class BaseAdapter(metaclass=AdapterMeta):
relations = self.list_relations_without_caching(
schema_relation
)
fire_event(ListRelations(
database=database,
schema=schema,
relations=[_make_key(x) for x in relations]
))
logger.debug('with database={}, schema={}, relations={}'
.format(database, schema, relations))
return relations
def _make_match_kwargs(
@@ -975,10 +968,10 @@ class BaseAdapter(metaclass=AdapterMeta):
'dbt could not find a macro with the name "{}" in {}'
.format(macro_name, package_name)
)
# This causes a reference cycle, as generate_runtime_macro_context()
# This causes a reference cycle, as generate_runtime_macro()
# ends up calling get_adapter, so the import has to be here.
from dbt.context.providers import generate_runtime_macro_context
macro_context = generate_runtime_macro_context(
from dbt.context.providers import generate_runtime_macro
macro_context = generate_runtime_macro(
macro=macro,
config=self.config,
manifest=manifest,

View File

@@ -89,10 +89,7 @@ class BaseRelation(FakeAPIObject, Hashable):
if not self._is_exactish_match(k, v):
exact_match = False
if (
self.path.get_lowered_part(k).strip(self.quote_character) !=
v.lower().strip(self.quote_character)
):
if self.path.get_lowered_part(k) != v.lower():
approximate_match = False
if approximate_match and not exact_match:

View File

@@ -1,27 +1,23 @@
import threading
from collections import namedtuple
from copy import deepcopy
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
from typing import List, Iterable, Optional, Dict, Set, Tuple, Any
import threading
from dbt.adapters.reference_keys import _make_key, _ReferenceKey
import dbt.exceptions
from dbt.events.functions import fire_event
from dbt.events.types import (
AddLink,
AddRelation,
DropCascade,
DropMissingRelation,
DropRelation,
DumpAfterAddGraph,
DumpAfterRenameSchema,
DumpBeforeAddGraph,
DumpBeforeRenameSchema,
RenameSchema,
TemporaryRelation,
UncachedRelation,
UpdateReference
)
from dbt.logger import CACHE_LOGGER as logger
from dbt.utils import lowercase
from dbt.helper_types import Lazy
import dbt.exceptions
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
def _make_key(relation) -> _ReferenceKey:
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
to keep track of quoting
"""
# databases and schemas can both be None
return _ReferenceKey(lowercase(relation.database),
lowercase(relation.schema),
lowercase(relation.identifier))
def dot_separated(key: _ReferenceKey) -> str:
@@ -161,6 +157,12 @@ class _CachedRelation:
return [dot_separated(r) for r in self.referenced_by]
def lazy_log(msg, func):
if logger.disabled:
return
logger.debug(msg.format(func()))
class RelationsCache:
"""A cache of the relations known to dbt. Keeps track of relationships
declared between tables and handles renames/drops as a real database would.
@@ -276,7 +278,6 @@ class RelationsCache:
referenced.add_reference(dependent)
# TODO: Is this dead code? I can't seem to find it grepping the codebase.
def add_link(self, referenced, dependent):
"""Add a link between two relations to the database. If either relation
does not exist, it will be added as an "external" relation.
@@ -292,12 +293,15 @@ class RelationsCache:
:raises InternalError: If either entry does not exist.
"""
ref_key = _make_key(referenced)
dep_key = _make_key(dependent)
if (ref_key.database, ref_key.schema) not in self:
# if we have not cached the referenced schema at all, we must be
# referring to a table outside our control. There's no need to make
# a link - we will never drop the referenced relation during a run.
fire_event(UncachedRelation(dep_key=dep_key, ref_key=ref_key))
logger.debug(
'{dep!s} references {ref!s} but {ref.database}.{ref.schema} '
'is not in the cache, skipping assumed external relation'
.format(dep=dependent, ref=ref_key)
)
return
if ref_key not in self.relations:
# Insert a dummy "external" relation.
@@ -305,13 +309,17 @@ class RelationsCache:
type=referenced.External
)
self.add(referenced)
dep_key = _make_key(dependent)
if dep_key not in self.relations:
# Insert a dummy "external" relation.
dependent = dependent.replace(
type=referenced.External
)
self.add(dependent)
fire_event(AddLink(dep_key=dep_key, ref_key=ref_key))
logger.debug(
'adding link, {!s} references {!s}'.format(dep_key, ref_key)
)
with self.lock:
self._add_link(ref_key, dep_key)
@@ -322,12 +330,14 @@ class RelationsCache:
:param BaseRelation relation: The underlying relation.
"""
cached = _CachedRelation(relation)
fire_event(AddRelation(relation=_make_key(cached)))
fire_event(DumpBeforeAddGraph(dump=Lazy.defer(lambda: self.dump_graph())))
logger.debug('Adding relation: {!s}'.format(cached))
lazy_log('before adding: {!s}', self.dump_graph)
with self.lock:
self._setdefault(cached)
fire_event(DumpAfterAddGraph(dump=Lazy.defer(lambda: self.dump_graph())))
lazy_log('after adding: {!s}', self.dump_graph)
def _remove_refs(self, keys):
"""Removes all references to all entries in keys. This does not
@@ -342,17 +352,20 @@ class RelationsCache:
for cached in self.relations.values():
cached.release_references(keys)
def _drop_cascade_relation(self, dropped_key):
def _drop_cascade_relation(self, dropped):
"""Drop the given relation and cascade it appropriately to all
dependent relations.
:param _CachedRelation dropped: An existing _CachedRelation to drop.
"""
if dropped_key not in self.relations:
fire_event(DropMissingRelation(relation=dropped_key))
if dropped not in self.relations:
logger.debug('dropped a nonexistent relationship: {!s}'
.format(dropped))
return
consequences = self.relations[dropped_key].collect_consequences()
fire_event(DropCascade(dropped=dropped_key, consequences=consequences))
consequences = self.relations[dropped].collect_consequences()
logger.debug(
'drop {} is cascading to {}'.format(dropped, consequences)
)
self._remove_refs(consequences)
def drop(self, relation):
@@ -366,10 +379,10 @@ class RelationsCache:
:param str schema: The schema of the relation to drop.
:param str identifier: The identifier of the relation to drop.
"""
dropped_key = _make_key(relation)
fire_event(DropRelation(dropped=dropped_key))
dropped = _make_key(relation)
logger.debug('Dropping relation: {!s}'.format(dropped))
with self.lock:
self._drop_cascade_relation(dropped_key)
self._drop_cascade_relation(dropped)
def _rename_relation(self, old_key, new_relation):
"""Rename a relation named old_key to new_key, updating references.
@@ -390,8 +403,9 @@ class RelationsCache:
# update all the relations that refer to it
for cached in self.relations.values():
if cached.is_referenced_by(old_key):
fire_event(
UpdateReference(old_key=old_key, new_key=new_key, cached_key=cached.key())
logger.debug(
'updated reference from {0} -> {2} to {1} -> {2}'
.format(old_key, new_key, cached.key())
)
cached.rename_key(old_key, new_key)
@@ -421,7 +435,10 @@ class RelationsCache:
)
if old_key not in self.relations:
fire_event(TemporaryRelation(key=old_key))
logger.debug(
'old key {} not found in self.relations, assuming temporary'
.format(old_key)
)
return False
return True
@@ -439,9 +456,11 @@ class RelationsCache:
"""
old_key = _make_key(old)
new_key = _make_key(new)
fire_event(RenameSchema(old_key=old_key, new_key=new_key))
logger.debug('Renaming relation {!s} to {!s}'.format(
old_key, new_key
))
fire_event(DumpBeforeRenameSchema(dump=Lazy.defer(lambda: self.dump_graph())))
lazy_log('before rename: {!s}', self.dump_graph)
with self.lock:
if self._check_rename_constraints(old_key, new_key):
@@ -449,7 +468,7 @@ class RelationsCache:
else:
self._setdefault(_CachedRelation(new))
fire_event(DumpAfterRenameSchema(dump=Lazy.defer(lambda: self.dump_graph())))
lazy_log('after rename: {!s}', self.dump_graph)
def get_relations(
self, database: Optional[str], schema: Optional[str]

View File

@@ -8,9 +8,10 @@ from dbt.include.global_project import (
PACKAGE_PATH as GLOBAL_PROJECT_PATH,
PROJECT_NAME as GLOBAL_PROJECT_NAME,
)
from dbt.events.functions import fire_event
from dbt.events.types import AdapterImportError, PluginLoadError
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.contracts.connection import Credentials, AdapterRequiredConfig
from dbt.adapters.protocol import (
AdapterProtocol,
AdapterConfig,
@@ -66,12 +67,11 @@ class AdapterContainer:
# if we failed to import the target module in particular, inform
# the user about it via a runtime error
if exc.name == 'dbt.adapters.' + name:
fire_event(AdapterImportError(exc=exc))
raise RuntimeException(f'Could not find adapter type {name}!')
logger.info(f'Error importing adapter: {exc}')
# otherwise, the error had to have come from some underlying
# library. Log the stack trace.
fire_event(PluginLoadError())
logger.debug('', exc_info=True)
raise
plugin: AdapterPlugin = mod.Plugin
plugin_type = plugin.adapter.type()

View File

@@ -1,24 +0,0 @@
# this module exists to resolve circular imports with the events module
from collections import namedtuple
from typing import Optional
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
def lowercase(value: Optional[str]) -> Optional[str]:
if value is None:
return None
else:
return value.lower()
def _make_key(relation) -> _ReferenceKey:
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
to keep track of quoting
"""
# databases and schemas can both be None
return _ReferenceKey(lowercase(relation.database),
lowercase(relation.schema),
lowercase(relation.identifier))

View File

@@ -10,8 +10,7 @@ from dbt.adapters.base import BaseConnectionManager
from dbt.contracts.connection import (
Connection, ConnectionState, AdapterResponse
)
from dbt.events.functions import fire_event
from dbt.events.types import ConnectionUsed, SQLQuery, SQLCommit, SQLQueryStatus
from dbt.logger import GLOBAL_LOGGER as logger
class SQLConnectionManager(BaseConnectionManager):
@@ -59,7 +58,9 @@ class SQLConnectionManager(BaseConnectionManager):
connection = self.get_thread_connection()
if auto_begin and connection.transaction_open is False:
self.begin()
fire_event(ConnectionUsed(conn_type=self.TYPE, conn_name=connection.name))
logger.debug('Using {} connection "{}".'
.format(self.TYPE, connection.name))
with self.exception_handler(sql):
if abridge_sql_log:
@@ -67,17 +68,19 @@ class SQLConnectionManager(BaseConnectionManager):
else:
log_sql = sql
fire_event(SQLQuery(conn_name=connection.name, sql=log_sql))
logger.debug(
'On {connection_name}: {sql}',
connection_name=connection.name,
sql=log_sql,
)
pre = time.time()
cursor = connection.handle.cursor()
cursor.execute(sql, bindings)
fire_event(
SQLQueryStatus(
status=str(self.get_response(cursor)),
elapsed=round((time.time() - pre), 2)
)
logger.debug(
"SQL status: {status} in {elapsed:0.2f} seconds",
status=self.get_response(cursor),
elapsed=(time.time() - pre)
)
return connection, cursor
@@ -157,7 +160,7 @@ class SQLConnectionManager(BaseConnectionManager):
'Tried to commit transaction on connection "{}", but '
'it does not have one open!'.format(connection.name))
fire_event(SQLCommit(conn_name=connection.name))
logger.debug('On {}: COMMIT'.format(connection.name))
self.add_commit_query()
connection.transaction_open = False

View File

@@ -5,11 +5,8 @@ import dbt.clients.agate_helper
from dbt.contracts.connection import Connection
import dbt.exceptions
from dbt.adapters.base import BaseAdapter, available
from dbt.adapters.cache import _make_key
from dbt.adapters.sql import SQLConnectionManager
from dbt.events.functions import fire_event
from dbt.events.types import ColTypeChange, SchemaCreation, SchemaDrop
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.adapters.base.relation import BaseRelation
@@ -119,13 +116,8 @@ class SQLAdapter(BaseAdapter):
target_column.can_expand_to(reference_column):
col_string_size = reference_column.string_size()
new_type = self.Column.string_type(col_string_size)
fire_event(
ColTypeChange(
orig_type=target_column.data_type,
new_type=new_type,
table=_make_key(current),
)
)
logger.debug("Changing col type from {} to {} in table {}",
target_column.data_type, new_type, current)
self.alter_column_type(current, column_name, new_type)
@@ -183,7 +175,7 @@ class SQLAdapter(BaseAdapter):
def create_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaCreation(relation=_make_key(relation)))
logger.debug('Creating schema "{}"', relation)
kwargs = {
'relation': relation,
}
@@ -194,7 +186,7 @@ class SQLAdapter(BaseAdapter):
def drop_schema(self, relation: BaseRelation) -> None:
relation = relation.without_identifier()
fire_event(SchemaDrop(relation=_make_key(relation)))
logger.debug('Dropping schema "{}".', relation)
kwargs = {
'relation': relation,
}

View File

@@ -1 +0,0 @@
# Clients README

View File

@@ -13,18 +13,6 @@ from dbt.exceptions import RuntimeException
BOM = BOM_UTF8.decode('utf-8') # '\ufeff'
class Number(agate.data_types.Number):
# undo the change in https://github.com/wireservice/agate/pull/733
# i.e. do not cast True and False to numeric 1 and 0
def cast(self, d):
if type(d) == bool:
raise agate.exceptions.CastError(
'Do not cast True to 1 or False to 0.'
)
else:
return super().cast(d)
class ISODateTime(agate.data_types.DateTime):
def cast(self, d):
# this is agate.data_types.DateTime.cast with the "clever" bits removed
@@ -53,7 +41,7 @@ def build_type_tester(
) -> agate.TypeTester:
types = [
Number(null_values=('null', '')),
agate.data_types.Number(null_values=('null', '')),
agate.data_types.Date(null_values=('null', ''),
date_format='%Y-%m-%d'),
agate.data_types.DateTime(null_values=('null', ''),

View File

@@ -8,10 +8,7 @@ from dbt.events.types import (
GitProgressUpdatingExistingDependency, GitProgressPullingNewDependency,
GitNothingToDo, GitProgressUpdatedCheckoutRange, GitProgressCheckedOutAt
)
from dbt.exceptions import (
CommandResultError, RuntimeException, bad_package_spec, raise_git_cloning_error,
raise_git_cloning_problem
)
import dbt.exceptions
from packaging import version
@@ -20,16 +17,6 @@ def _is_commit(revision: str) -> bool:
return bool(re.match(r"\b[0-9a-f]{40}\b", revision))
def _raise_git_cloning_error(repo, revision, error):
stderr = error.stderr.decode('utf-8').strip()
if 'usage: git' in stderr:
stderr = stderr.split('\nusage: git')[0]
if re.match("fatal: destination path '(.+)' already exists", stderr):
raise_git_cloning_error(error)
bad_package_spec(repo, revision, stderr)
def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirectory=None):
has_revision = revision is not None
is_commit = _is_commit(revision or "")
@@ -54,18 +41,10 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False, revision=None, subdirec
if dirname is not None:
clone_cmd.append(dirname)
try:
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
except CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
if subdirectory:
cwd_subdir = os.path.join(cwd, dirname or '')
clone_cmd_subdir = ['git', 'sparse-checkout', 'set', subdirectory]
try:
run_cmd(cwd_subdir, clone_cmd_subdir)
except CommandResultError as exc:
_raise_git_cloning_error(repo, revision, exc)
run_cmd(os.path.join(cwd, dirname or ''), ['git', 'sparse-checkout', 'set', subdirectory])
if remove_git_dir:
rmdir(os.path.join(dirname, '.git'))
@@ -108,9 +87,9 @@ def checkout(cwd, repo, revision=None):
revision = 'HEAD'
try:
return _checkout(cwd, repo, revision)
except CommandResultError as exc:
except dbt.exceptions.CommandResultError as exc:
stderr = exc.stderr.decode('utf-8').strip()
bad_package_spec(repo, revision, stderr)
dbt.exceptions.bad_package_spec(repo, revision, stderr)
def get_current_sha(cwd):
@@ -134,11 +113,11 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
remove_git_dir=remove_git_dir,
subdirectory=subdirectory,
)
except CommandResultError as exc:
except dbt.exceptions.CommandResultError as exc:
err = exc.stderr.decode('utf-8')
exists = re.match("fatal: destination path '(.+)' already exists", err)
if not exists:
raise_git_cloning_problem(repo)
if not exists: # something else is wrong, raise it
raise
directory = None
start_sha = None
@@ -148,7 +127,7 @@ def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
else:
matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
if matches is None:
raise RuntimeException(
raise dbt.exceptions.RuntimeException(
f'Error cloning {repo} - never saw "Cloning into ..." from git'
)
directory = matches.group(1)

View File

@@ -21,7 +21,7 @@ import jinja2.sandbox
from dbt.utils import (
get_dbt_macro_name, get_docs_macro_name, get_materialization_macro_name,
get_test_macro_name, deep_map_render
get_test_macro_name, deep_map
)
from dbt.clients._jinja_blocks import BlockIterator, BlockData, BlockTag
@@ -660,7 +660,5 @@ def add_rendered_test_kwargs(
return value
# The test_metadata.kwargs come from the test builder, and were set
# when the test node was created in _parse_generic_test.
kwargs = deep_map_render(_convert_function, node.test_metadata.kwargs)
kwargs = deep_map(_convert_function, node.test_metadata.kwargs)
context[GENERIC_TEST_KWARGS_NAME] = kwargs

View File

@@ -33,15 +33,6 @@ def _get(path, registry_base_url=None):
resp = requests.get(url, timeout=30)
fire_event(RegistryProgressGETResponse(url=url, resp_code=resp.status_code))
resp.raise_for_status()
# It is unexpected for the content of the response to be None so if it is, raising this error
# will cause this function to retry (if called within _get_with_retries) and hopefully get
# a response. This seems to happen when there's an issue with the Hub.
# See https://github.com/dbt-labs/dbt-core/issues/4577
if resp.json() is None:
raise requests.exceptions.ContentDecodingError(
'Request error: The response is None', response=resp
)
return resp.json()

View File

@@ -441,7 +441,7 @@ def run_cmd(
fire_event(SystemStdErrMsg(bmsg=err))
if proc.returncode != 0:
fire_event(SystemReportReturnCode(returncode=proc.returncode))
fire_event(SystemReportReturnCode(code=proc.returncode))
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
out, err)
@@ -485,7 +485,7 @@ def untar_package(
) -> None:
tar_path = convert_path(tar_path)
tar_dir_name = None
with tarfile.open(tar_path, 'r:gz') as tarball:
with tarfile.open(tar_path, 'r') as tarball:
tarball.extractall(dest_dir)
tar_dir_name = os.path.commonprefix(tarball.getnames())
if rename_to:

View File

@@ -3,14 +3,13 @@ from collections import defaultdict
from typing import List, Dict, Any, Tuple, cast, Optional
import networkx as nx # type: ignore
import pickle
import sqlparse
from dbt import flags
from dbt.adapters.factory import get_adapter
from dbt.clients import jinja
from dbt.clients.system import make_directory
from dbt.context.providers import generate_runtime_model_context
from dbt.context.providers import generate_runtime_model
from dbt.contracts.graph.manifest import Manifest, UniqueID
from dbt.contracts.graph.compiled import (
COMPILED_TYPES,
@@ -27,10 +26,9 @@ from dbt.exceptions import (
RuntimeException,
)
from dbt.graph import Graph
from dbt.events.functions import fire_event
from dbt.events.types import FoundStats, CompilingNode, WritingInjectedSQLForNode
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.events.format import pluralize
from dbt.utils import pluralize
import dbt.tracking
graph_file_name = 'graph.gpickle'
@@ -55,7 +53,6 @@ def print_compile_stats(stats):
NodeType.Seed: 'seed file',
NodeType.Source: 'source',
NodeType.Exposure: 'exposure',
NodeType.Metric: 'metric'
}
results = {k: 0 for k in names.keys()}
@@ -71,7 +68,7 @@ def print_compile_stats(stats):
if t in names
])
fire_event(FoundStats(stat_line=stat_line))
logger.info("Found {}".format(stat_line))
def _node_enabled(node: ManifestNode):
@@ -92,8 +89,6 @@ def _generate_stats(manifest: Manifest):
stats[source.resource_type] += 1
for exposure in manifest.exposures.values():
stats[exposure.resource_type] += 1
for metric in manifest.metrics.values():
stats[metric.resource_type] += 1
for macro in manifest.macros.values():
stats[macro.resource_type] += 1
return stats
@@ -163,8 +158,7 @@ class Linker:
for node_id in self.graph:
data = manifest.expect(node_id).to_dict(omit_none=True)
out_graph.add_node(node_id, **data)
with open(outfile, 'wb') as outfh:
pickle.dump(out_graph, outfh, protocol=pickle.HIGHEST_PROTOCOL)
nx.write_gpickle(out_graph, outfile)
class Compiler:
@@ -184,7 +178,7 @@ class Compiler:
extra_context: Dict[str, Any],
) -> Dict[str, Any]:
context = generate_runtime_model_context(
context = generate_runtime_model(
node, self.config, manifest
)
context.update(extra_context)
@@ -372,7 +366,7 @@ class Compiler:
if extra_context is None:
extra_context = {}
fire_event(CompilingNode(unique_id=node.unique_id))
logger.debug("Compiling {}".format(node.unique_id))
data = node.to_dict(omit_none=True)
data.update({
@@ -424,44 +418,42 @@ class Compiler:
else:
dependency_not_found(node, dependency)
def link_graph(self, linker: Linker, manifest: Manifest, add_test_edges: bool = False):
def link_graph(self, linker: Linker, manifest: Manifest):
for source in manifest.sources.values():
linker.add_node(source.unique_id)
for node in manifest.nodes.values():
self.link_node(linker, node, manifest)
for exposure in manifest.exposures.values():
self.link_node(linker, exposure, manifest)
for metric in manifest.metrics.values():
self.link_node(linker, metric, manifest)
cycle = linker.find_cycles()
if cycle:
raise RuntimeError("Found a cycle: {}".format(cycle))
if add_test_edges:
manifest.build_parent_and_child_maps()
self.add_test_edges(linker, manifest)
manifest.build_parent_and_child_maps()
def add_test_edges(self, linker: Linker, manifest: Manifest) -> None:
self.resolve_graph(linker, manifest)
def resolve_graph(self, linker: Linker, manifest: Manifest) -> None:
""" This method adds additional edges to the DAG. For a given non-test
executable node, add an edge from an upstream test to the given node if
the set of nodes the test depends on is a subset of the upstream nodes
for the given node. """
the set of nodes the test depends on is a proper/strict subset of the
upstream nodes for the given node. """
# Given a graph:
# model1 --> model2 --> model3
# | |
# | \/
# \/ test 2
# | |
# | \/
# \/ test 2
# test1
#
# Produce the following graph:
# model1 --> model2 --> model3
# | /\ | /\ /\
# | | \/ | |
# \/ | test2 ----| |
# test1 ----|---------------|
# | | /\ /\
# | \/ | |
# \/ test2 ------- |
# test1 -------------------
for node_id in linker.graph:
# If node is executable (in manifest.nodes) and does _not_
@@ -499,19 +491,21 @@ class Compiler:
)
# If the set of nodes that an upstream test depends on
# is a subset of all upstream nodes of the current node,
# add an edge from the upstream test to the current node.
if (test_depends_on.issubset(upstream_nodes)):
# is a proper (or strict) subset of all upstream nodes of
# the current node, add an edge from the upstream test
# to the current node. Must be a proper/strict subset to
# avoid adding a circular dependency to the graph.
if (test_depends_on < upstream_nodes):
linker.graph.add_edge(
upstream_test,
node_id
)
def compile(self, manifest: Manifest, write=True, add_test_edges=False) -> Graph:
def compile(self, manifest: Manifest, write=True) -> Graph:
self.initialize()
linker = Linker()
self.link_graph(linker, manifest, add_test_edges)
self.link_graph(linker, manifest)
stats = _generate_stats(manifest)
@@ -526,7 +520,7 @@ class Compiler:
if (not node.extra_ctes_injected or
node.resource_type == NodeType.Snapshot):
return node
fire_event(WritingInjectedSQLForNode(unique_id=node.unique_id))
logger.debug(f'Writing injected SQL for node "{node.unique_id}"')
if node.compiled_sql:
node.compiled_path = node.write_node(

View File

@@ -1 +0,0 @@
# Config README

View File

@@ -15,8 +15,7 @@ from dbt.exceptions import DbtProjectError
from dbt.exceptions import ValidationException
from dbt.exceptions import RuntimeException
from dbt.exceptions import validator_error_message
from dbt.events.types import MissingProfileTarget
from dbt.events.functions import fire_event
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import coerce_dict_str
from .renderer import ProfileRenderer
@@ -92,7 +91,6 @@ class Profile(HasCredentials):
user_config: UserConfig
threads: int
credentials: Credentials
profile_env_vars: Dict[str, Any]
def __init__(
self,
@@ -110,7 +108,6 @@ class Profile(HasCredentials):
self.user_config = user_config
self.threads = threads
self.credentials = credentials
self.profile_env_vars = {} # never available on init
def to_profile_info(
self, serialize_credentials: bool = False
@@ -294,7 +291,10 @@ class Profile(HasCredentials):
target_name = renderer.render_value(raw_profile['target'])
else:
target_name = 'default'
fire_event(MissingProfileTarget(profile_name=profile_name, target_name=target_name))
logger.debug(
"target not specified in profile '{}', using '{}'"
.format(profile_name, target_name)
)
raw_profile_data = cls._get_profile_data(
raw_profile, profile_name, target_name

View File

@@ -45,7 +45,7 @@ INVALID_VERSION_ERROR = """\
This version of dbt is not supported with the '{package}' package.
Installed version of dbt: {installed}
Required version of dbt for '{package}': {version_spec}
Check for a different version of the '{package}' package, or run dbt again with \
Check the requirements for the '{package}' package, or run dbt again with \
--no-version-check
"""
@@ -54,7 +54,7 @@ IMPOSSIBLE_VERSION_ERROR = """\
The package version requirement can never be satisfied for the '{package}
package.
Required versions of dbt for '{package}': {version_spec}
Check for a different version of the '{package}' package, or run dbt again with \
Check the requirements for the '{package}' package, or run dbt again with \
--no-version-check
"""
@@ -284,7 +284,6 @@ class PartialProject(RenderComponents):
selectors_dict=rendered_selectors,
)
# Called by 'collect_parts' in RuntimeConfig
def render(self, renderer: DbtProjectYamlRenderer) -> 'Project':
try:
rendered = self.get_rendered(renderer)
@@ -305,7 +304,7 @@ class PartialProject(RenderComponents):
)
raise DbtProjectError(msg.format(deprecated_path=deprecated_path,
exp_path=exp_path))
deprecations.warn(f'project-config-{deprecated_path}',
deprecations.warn('project_config_path',
deprecated_path=deprecated_path,
exp_path=exp_path)
@@ -398,8 +397,6 @@ class PartialProject(RenderComponents):
vars_dict = cfg.vars
vars_value = VarProvider(vars_dict)
# There will never be any project_env_vars when it's first created
project_env_vars: Dict[str, Any] = {}
on_run_start: List[str] = value_or(cfg.on_run_start, [])
on_run_end: List[str] = value_or(cfg.on_run_end, [])
@@ -447,7 +444,6 @@ class PartialProject(RenderComponents):
vars=vars_value,
config_version=cfg.config_version,
unrendered=unrendered,
project_env_vars=project_env_vars,
)
# sanity check - this means an internal issue
project.validate()
@@ -560,7 +556,6 @@ class Project:
query_comment: QueryComment
config_version: int
unrendered: RenderComponents
project_env_vars: Dict[str, Any]
@property
def all_source_paths(self) -> List[str]:
@@ -569,13 +564,6 @@ class Project:
self.analysis_paths, self.macro_paths
)
@property
def generic_test_paths(self):
generic_test_paths = []
for test_path in self.test_paths:
generic_test_paths.append(os.path.join(test_path, 'generic'))
return generic_test_paths
def __str__(self):
cfg = self.to_project_config(with_packages=True)
return str(cfg)
@@ -650,6 +638,26 @@ class Project:
verify_version=verify_version,
)
@classmethod
def render_from_dict(
cls,
project_root: str,
project_dict: Dict[str, Any],
packages_dict: Dict[str, Any],
selectors_dict: Dict[str, Any],
renderer: DbtProjectYamlRenderer,
*,
verify_version: bool = False
) -> 'Project':
partial = PartialProject.from_dicts(
project_root=project_root,
project_dict=project_dict,
packages_dict=packages_dict,
selectors_dict=selectors_dict,
verify_version=verify_version,
)
return partial.render(renderer)
@classmethod
def from_project_root(
cls,

View File

@@ -1,14 +1,12 @@
from typing import Dict, Any, Tuple, Optional, Union, Callable
from dbt.clients.jinja import get_rendered, catch_jinja
from dbt.context.target import TargetContext
from dbt.context.secret import SecretContext
from dbt.context.base import BaseContext
from dbt.contracts.connection import HasCredentials
from dbt.exceptions import (
DbtProjectError, CompilationException, RecursionException
)
from dbt.utils import deep_map_render
from dbt.node_types import NodeType
from dbt.utils import deep_map
Keypath = Tuple[Union[str, int], ...]
@@ -49,7 +47,7 @@ class BaseRenderer:
self, data: Dict[str, Any]
) -> Dict[str, Any]:
try:
return deep_map_render(self.render_entry, data)
return deep_map(self.render_entry, data)
except RecursionException:
raise DbtProjectError(
f'Cycle detected: {self.name} input has a reference to itself',
@@ -101,23 +99,6 @@ class ProjectPostprocessor(Dict[Keypath, Callable[[Any], Any]]):
class DbtProjectYamlRenderer(BaseRenderer):
_KEYPATH_HANDLERS = ProjectPostprocessor()
def __init__(
self, profile: Optional[HasCredentials] = None,
cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars. This is almost always
# a TargetContext, but in the debug task we want a project
# even when we don't have a profile.
if cli_vars is None:
cli_vars = {}
if profile:
self.ctx_obj = TargetContext(profile, cli_vars)
else:
self.ctx_obj = BaseContext(cli_vars) # type:ignore
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
'Project config'
@@ -176,36 +157,82 @@ class DbtProjectYamlRenderer(BaseRenderer):
return True
class ProfileRenderer(BaseRenderer):
@property
def name(self):
'Profile'
class SchemaYamlRenderer(BaseRenderer):
DOCUMENTABLE_NODES = frozenset(
n.pluralize() for n in NodeType.documentable()
)
@property
def name(self):
return 'Rendering yaml'
def _is_norender_key(self, keypath: Keypath) -> bool:
"""
models:
- name: blah
- description: blah
tests: ...
- columns:
- name:
- description: blah
tests: ...
Return True if it's tests or description - those aren't rendered
"""
if len(keypath) >= 2 and keypath[1] in ('tests', 'description'):
return True
if (
len(keypath) >= 4 and
keypath[1] == 'columns' and
keypath[3] in ('tests', 'description')
):
return True
return False
# don't render descriptions or test keyword arguments
def should_render_keypath(self, keypath: Keypath) -> bool:
if len(keypath) < 2:
return True
if keypath[0] not in self.DOCUMENTABLE_NODES:
return True
if len(keypath) < 3:
return True
if keypath[0] == NodeType.Source.pluralize():
if keypath[2] == 'description':
return False
if keypath[2] == 'tables':
if self._is_norender_key(keypath[3:]):
return False
elif keypath[0] == NodeType.Macro.pluralize():
if keypath[2] == 'arguments':
if self._is_norender_key(keypath[3:]):
return False
elif self._is_norender_key(keypath[1:]):
return False
else: # keypath[0] in self.DOCUMENTABLE_NODES:
if self._is_norender_key(keypath[1:]):
return False
return True
class PackageRenderer(BaseRenderer):
@property
def name(self):
return 'Packages config'
class SelectorRenderer(BaseRenderer):
@property
def name(self):
return 'Selector config'
class SecretRenderer(BaseRenderer):
def __init__(
self, cli_vars: Optional[Dict[str, Any]] = None
) -> None:
# Generate contexts here because we want to save the context
# object in order to retrieve the env_vars.
if cli_vars is None:
cli_vars = {}
self.ctx_obj = SecretContext(cli_vars)
context = self.ctx_obj.to_dict()
super().__init__(context)
@property
def name(self):
return 'Secret'
class ProfileRenderer(SecretRenderer):
@property
def name(self):
return 'Profile'
class PackageRenderer(SecretRenderer):
@property
def name(self):
return 'Packages config'

View File

@@ -1,7 +1,7 @@
import itertools
import os
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import dataclass, fields
from pathlib import Path
from typing import (
Dict, Any, Optional, Mapping, Iterator, Iterable, Tuple, List, MutableSet,
@@ -13,17 +13,21 @@ from .project import Project
from .renderer import DbtProjectYamlRenderer, ProfileRenderer
from .utils import parse_cli_vars
from dbt import flags
from dbt import tracking
from dbt.adapters.factory import get_relation_class_by_name, get_include_paths
from dbt.helper_types import FQNPath, PathSet
from dbt.config.profile import read_user_config
from dbt.context.base import generate_base_context
from dbt.context.target import generate_target_context
from dbt.contracts.connection import AdapterRequiredConfig, Credentials
from dbt.contracts.graph.manifest import ManifestMetadata
from dbt.contracts.relation import ComponentName
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.ui import warning_tag
from dbt.contracts.project import Configuration, UserConfig
from dbt.exceptions import (
RuntimeException,
DbtProfileError,
DbtProjectError,
validator_error_message,
warn_or_error,
@@ -56,7 +60,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def __post_init__(self):
self.validate()
# Called by 'new_project' and 'from_args'
@classmethod
def from_parts(
cls,
@@ -113,8 +116,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name=profile.profile_name,
target_name=profile.target_name,
user_config=profile.user_config,
@@ -125,7 +126,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
dependencies=dependencies,
)
# Called by 'load_projects' in this class
def new_project(self, project_root: str) -> 'RuntimeConfig':
"""Given a new project root, read in its project dictionary, supply the
existing project's profile info, and create a new project file.
@@ -140,7 +140,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
profile.validate()
# load the new project and its packages. Don't pass cli variables.
renderer = DbtProjectYamlRenderer(profile)
renderer = DbtProjectYamlRenderer(generate_target_context(profile, {}))
project = Project.from_project_root(
project_root,
@@ -148,14 +148,14 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
verify_version=bool(flags.VERSION_CHECK),
)
runtime_config = self.from_parts(
cfg = self.from_parts(
project=project,
profile=profile,
args=deepcopy(self.args),
)
# force our quoting back onto the new project.
runtime_config.quoting = deepcopy(self.quoting)
return runtime_config
cfg.quoting = deepcopy(self.quoting)
return cfg
def serialize(self) -> Dict[str, Any]:
"""Serialize the full configuration to a single dictionary. For any
@@ -188,7 +188,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
profile_renderer: ProfileRenderer,
profile_name: Optional[str],
) -> Profile:
return Profile.render_from_args(
args, profile_renderer, profile_name
)
@@ -206,26 +205,21 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
)
# build the profile using the base renderer and the one fact we know
# Note: only the named profile section is rendered. The rest of the
# profile is ignored.
cli_vars: Dict[str, Any] = parse_cli_vars(getattr(args, 'vars', '{}'))
profile_renderer = ProfileRenderer(cli_vars)
profile_renderer = ProfileRenderer(generate_base_context(cli_vars))
profile_name = partial.render_profile_name(profile_renderer)
profile = cls._get_rendered_profile(
args, profile_renderer, profile_name
)
# Save env_vars encountered in rendering for partial parsing
profile.profile_env_vars = profile_renderer.ctx_obj.env_vars
# get a new renderer using our target information and render the
# project
project_renderer = DbtProjectYamlRenderer(profile, cli_vars)
ctx = generate_target_context(profile, cli_vars)
project_renderer = DbtProjectYamlRenderer(ctx)
project = partial.render(project_renderer)
# Save env_vars encountered in rendering for partial parsing
project.project_env_vars = project_renderer.ctx_obj.env_vars
return (project, profile)
# Called in main.py, lib.py, task/base.py
@classmethod
def from_args(cls, args: Any) -> 'RuntimeConfig':
"""Given arguments, read in dbt_project.yml from the current directory,
@@ -259,7 +253,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
) -> PathSet:
for key, value in config.items():
if isinstance(value, dict) and not key.startswith('+'):
self._get_config_paths(value, path + (key,), paths)
self._get_v2_config_paths(value, path + (key,), paths)
else:
paths.add(path)
return frozenset(paths)
@@ -366,7 +360,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
def clear_dependencies(self):
self.dependencies = None
# Called by 'load_dependencies' in this class
def load_projects(
self, paths: Iterable[Path]
) -> Iterator[Tuple[str, 'RuntimeConfig']]:
@@ -410,12 +403,21 @@ class UnsetCredentials(Credentials):
return ()
# This is used by UnsetProfileConfig, for commands which do
# not require a profile, i.e. dbt deps and clean
class UnsetConfig(UserConfig):
def __getattribute__(self, name):
if name in {f.name for f in fields(UserConfig)}:
raise AttributeError(
f"'UnsetConfig' object has no attribute {name}"
)
def __post_serialize__(self, dct):
return {}
class UnsetProfile(Profile):
def __init__(self):
self.credentials = UnsetCredentials()
self.user_config = UserConfig() # This will be read in _get_rendered_profile
self.user_config = UnsetConfig()
self.profile_name = ''
self.target_name = ''
self.threads = -1
@@ -432,8 +434,6 @@ class UnsetProfile(Profile):
return Profile.__getattribute__(self, name)
# This class is used by the dbt deps and clean commands, because they don't
# require a functioning profile.
@dataclass
class UnsetProfileConfig(RuntimeConfig):
"""This class acts a lot _like_ a RuntimeConfig, except if your profile is
@@ -512,11 +512,9 @@ class UnsetProfileConfig(RuntimeConfig):
vars=project.vars,
config_version=project.config_version,
unrendered=project.unrendered,
project_env_vars=project.project_env_vars,
profile_env_vars=profile.profile_env_vars,
profile_name='',
target_name='',
user_config=UserConfig(),
user_config=UnsetConfig(),
threads=getattr(args, 'threads', 1),
credentials=UnsetCredentials(),
args=args,
@@ -531,12 +529,22 @@ class UnsetProfileConfig(RuntimeConfig):
profile_renderer: ProfileRenderer,
profile_name: Optional[str],
) -> Profile:
profile = UnsetProfile()
# The profile (for warehouse connection) is not needed, but we want
# to get the UserConfig, which is also in profiles.yml
user_config = read_user_config(flags.PROFILES_DIR)
profile.user_config = user_config
try:
profile = Profile.render_from_args(
args, profile_renderer, profile_name
)
except (DbtProjectError, DbtProfileError) as exc:
logger.debug(
'Profile not loaded due to error: {}', exc, exc_info=True
)
logger.info(
'No profile "{}" found, continuing with no target',
profile_name
)
# return the poisoned form
profile = UnsetProfile()
# disable anonymous usage statistics
tracking.disable_tracking()
return profile
@classmethod
@@ -551,6 +559,9 @@ class UnsetProfileConfig(RuntimeConfig):
:raises ValidationException: If the cli variables are invalid.
"""
project, profile = cls.collect_parts(args)
if not isinstance(profile, UnsetProfile):
# if it's a real profile, return a real config
cls = RuntimeConfig
return cls.from_parts(
project=project,

View File

@@ -1,9 +1,8 @@
from typing import Dict, Any
from dbt.clients import yaml_helper
from dbt.events.functions import fire_event
from dbt.exceptions import raise_compiler_error, ValidationException
from dbt.events.types import InvalidVarsYAML
from dbt.logger import GLOBAL_LOGGER as logger
def parse_cli_vars(var_string: str) -> Dict[str, Any]:
@@ -18,5 +17,7 @@ def parse_cli_vars(var_string: str) -> Dict[str, Any]:
"The --vars argument must be a YAML dictionary, but was "
"of type '{}'".format(type_name))
except ValidationException:
fire_event(InvalidVarsYAML())
logger.error(
"The YAML provided in the --vars argument is not valid.\n"
)
raise

View File

@@ -1 +0,0 @@
# Contexts and Jinja rendering

View File

@@ -6,16 +6,13 @@ from typing import (
from dbt import flags
from dbt import tracking
from dbt.clients.jinja import get_rendered
from dbt.clients.jinja import undefined_error, get_rendered
from dbt.clients.yaml_helper import ( # noqa: F401
yaml, safe_load, SafeLoader, Loader, Dumper
)
from dbt.contracts.graph.compiled import CompiledResource
from dbt.exceptions import (
raise_compiler_error, MacroReturn, raise_parsing_error, disallow_secret_env_var
)
from dbt.logger import SECRET_ENV_PREFIX
from dbt.events.functions import fire_event, get_invocation_id
from dbt.exceptions import raise_compiler_error, MacroReturn
from dbt.events.functions import fire_event
from dbt.events.types import MacroEventInfo, MacroEventDebug
from dbt.version import __version__ as dbt_version
@@ -25,39 +22,6 @@ import pytz
import datetime
import re
# Contexts in dbt Core
# Contexts are used for Jinja rendering. They include context methods,
# executable macros, and various settings that are available in Jinja.
#
# Different contexts are used in different places because we allow access
# to different methods and data in different places. Executable SQL, for
# example, includes the available macros and the model, while Jinja in
# yaml files is more limited.
#
# The context that is passed to Jinja is always in a dictionary format,
# not an actual class, so a 'to_dict()' is executed on a context class
# before it is used for rendering.
#
# Each context has a generate_<name>_context function to create the context.
# ProviderContext subclasses have different generate functions for
# parsing and for execution.
#
# Context class hierarchy
#
# BaseContext -- core/dbt/context/base.py
# SecretContext -- core/dbt/context/secret.py
# TargetContext -- core/dbt/context/target.py
# ConfiguredContext -- core/dbt/context/configured.py
# SchemaYamlContext -- core/dbt/context/configured.py
# DocsRuntimeContext -- core/dbt/context/configured.py
# MacroResolvingContext -- core/dbt/context/configured.py
# ManifestContext -- core/dbt/context/manifest.py
# QueryHeaderContext -- core/dbt/context/manifest.py
# ProviderContext -- core/dbt/context/provider.py
# MacroContext -- core/dbt/context/provider.py
# ModelContext -- core/dbt/context/provider.py
# TestContext -- core/dbt/context/provider.py
def get_pytz_module_context() -> Dict[str, Any]:
context_exports = pytz.__all__ # type: ignore
@@ -197,11 +161,9 @@ class Var:
class BaseContext(metaclass=ContextMeta):
# subclass is TargetContext
def __init__(self, cli_vars):
self._ctx = {}
self.cli_vars = cli_vars
self.env_vars = {}
def generate_builtins(self):
builtins: Dict[str, Any] = {}
@@ -310,26 +272,20 @@ class BaseContext(metaclass=ContextMeta):
return Var(self._ctx, self.cli_vars)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
@staticmethod
def env_var(var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
return os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
self.env_vars[var] = return_value
return return_value
return default
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
undefined_error(msg)
if os.environ.get('DBT_MACRO_DEBUGGING'):
@contextmember
@@ -488,9 +444,9 @@ class BaseContext(metaclass=ContextMeta):
{% endmacro %}"
"""
if info:
fire_event(MacroEventInfo(msg=msg))
fire_event(MacroEventInfo(msg))
else:
fire_event(MacroEventDebug(msg=msg))
fire_event(MacroEventDebug(msg))
return ''
@contextproperty
@@ -526,7 +482,10 @@ class BaseContext(metaclass=ContextMeta):
"""invocation_id outputs a UUID generated for this dbt run (useful for
auditing)
"""
return get_invocation_id()
if tracking.active_user is not None:
return tracking.active_user.invocation_id
else:
return None
@contextproperty
def modules(self) -> Dict[str, Any]:

View File

@@ -1,18 +1,14 @@
import os
from typing import Any, Dict, Optional
from typing import Any, Dict
from dbt.contracts.connection import AdapterRequiredConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.node_types import NodeType
from dbt.utils import MultiDict
from dbt.context.base import contextproperty, contextmember, Var
from dbt.context.base import contextproperty, Var
from dbt.context.target import TargetContext
from dbt.exceptions import raise_parsing_error, disallow_secret_env_var
class ConfiguredContext(TargetContext):
# subclasses are SchemaYamlContext, MacroResolvingContext, ManifestContext
config: AdapterRequiredConfig
def __init__(
@@ -67,18 +63,10 @@ class ConfiguredVar(Var):
return self.get_missing_var(var_name)
class SchemaYamlVars():
def __init__(self):
self.env_vars = {}
self.vars = {}
class SchemaYamlContext(ConfiguredContext):
# subclass is DocsRuntimeContext
def __init__(self, config, project_name: str, schema_yaml_vars: Optional[SchemaYamlVars]):
def __init__(self, config, project_name: str):
super().__init__(config)
self._project_name = project_name
self.schema_yaml_vars = schema_yaml_vars
@contextproperty
def var(self) -> ConfiguredVar:
@@ -86,24 +74,6 @@ class SchemaYamlContext(ConfiguredContext):
self._ctx, self.config, self._project_name
)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
if self.schema_yaml_vars:
self.schema_yaml_vars.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroResolvingContext(ConfiguredContext):
def __init__(self, config):
@@ -116,10 +86,10 @@ class MacroResolvingContext(ConfiguredContext):
)
def generate_schema_yml_context(
config: AdapterRequiredConfig, project_name: str, schema_yaml_vars: SchemaYamlVars = None
def generate_schema_yml(
config: AdapterRequiredConfig, project_name: str
) -> Dict[str, Any]:
ctx = SchemaYamlContext(config, project_name, schema_yaml_vars)
ctx = SchemaYamlContext(config, project_name)
return ctx.to_dict()

View File

@@ -23,7 +23,7 @@ class DocsRuntimeContext(SchemaYamlContext):
manifest: Manifest,
current_project: str,
) -> None:
super().__init__(config, current_project, None)
super().__init__(config, current_project)
self.node = node
self.manifest = manifest
@@ -75,7 +75,7 @@ class DocsRuntimeContext(SchemaYamlContext):
return target_doc.block_contents
def generate_runtime_docs_context(
def generate_runtime_docs(
config: RuntimeConfig,
target: Any,
manifest: Manifest,

View File

@@ -17,7 +17,6 @@ class ManifestContext(ConfiguredContext):
The given macros can override any previous context values, which will be
available as if they were accessed relative to the package name.
"""
# subclasses are QueryHeaderContext and ProviderContext
def __init__(
self,
config: AdapterRequiredConfig,

View File

@@ -16,7 +16,6 @@ from dbt.config import RuntimeConfig, Project
from .base import contextmember, contextproperty, Var
from .configured import FQNLookup
from .context_config import ContextConfig
from dbt.logger import SECRET_ENV_PREFIX
from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace
from .macros import MacroNamespaceBuilder, MacroNamespace
from .manifest import ManifestContext
@@ -32,13 +31,11 @@ from dbt.contracts.graph.compiled import (
from dbt.contracts.graph.parsed import (
ParsedMacro,
ParsedExposure,
ParsedMetric,
ParsedSeedNode,
ParsedSourceDefinition,
)
from dbt.exceptions import (
CompilationException,
ParsingException,
InternalException,
ValidationException,
RuntimeException,
@@ -50,8 +47,6 @@ from dbt.exceptions import (
ref_bad_context,
source_target_not_found,
wrapped_exports,
raise_parsing_error,
disallow_secret_env_var,
)
from dbt.config import IsFQNResource
from dbt.node_types import NodeType
@@ -327,7 +322,7 @@ class ParseConfigObject(Config):
def require(self, name, validator=None):
return ''
def get(self, name, default=None, validator=None):
def get(self, name, validator=None, default=None):
return ''
def persist_relation_docs(self) -> bool:
@@ -371,7 +366,7 @@ class RuntimeConfigObject(Config):
return to_return
def get(self, name, default=None, validator=None):
def get(self, name, validator=None, default=None):
to_return = self._lookup(name, default)
if validator is not None and default is not None:
@@ -640,7 +635,6 @@ T = TypeVar('T')
# Base context collection, used for parsing configs.
class ProviderContext(ManifestContext):
# subclasses are MacroContext, ModelContext, TestContext
def __init__(
self,
model,
@@ -1166,37 +1160,6 @@ class ProviderContext(ManifestContext):
)
raise CompilationException(msg)
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
"""
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file.
# If this is compiling, do not save because it's irrelevant to parsing.
if self.model and not hasattr(self.model, 'compiled'):
self.manifest.env_vars[var] = return_value
# hooks come from dbt_project.yml which doesn't have a real file_id
if self.model.file_id in self.manifest.files:
source_file = self.manifest.files[self.model.file_id]
# Schema files should never get here
if source_file.parse_file_type != 'schema':
source_file.env_vars.append(var)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
class MacroContext(ProviderContext):
"""Internally, macros can be executed like nodes, with some restrictions:
@@ -1298,7 +1261,7 @@ class ModelContext(ProviderContext):
# This is called by '_context_for', used in 'render_with_context'
def generate_parser_model_context(
def generate_parser_model(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1315,7 +1278,7 @@ def generate_parser_model_context(
return ctx.to_dict()
def generate_generate_name_macro_context(
def generate_generate_component_name_macro(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1326,7 +1289,7 @@ def generate_generate_name_macro_context(
return ctx.to_dict()
def generate_runtime_model_context(
def generate_runtime_model(
model: ManifestNode,
config: RuntimeConfig,
manifest: Manifest,
@@ -1337,7 +1300,7 @@ def generate_runtime_model_context(
return ctx.to_dict()
def generate_runtime_macro_context(
def generate_runtime_macro(
macro: ParsedMacro,
config: RuntimeConfig,
manifest: Manifest,
@@ -1391,44 +1354,6 @@ def generate_parse_exposure(
}
class MetricRefResolver(BaseResolver):
def __call__(self, *args) -> str:
package = None
if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
ref_invalid_args(self.model, args)
self.validate_args(name, package)
self.model.refs.append(list(args))
return ''
def validate_args(self, name, package):
if not isinstance(name, str):
raise ParsingException(
f'In a metrics section in {self.model.original_file_path} '
f'the name argument to ref() must be a string'
)
def generate_parse_metrics(
metric: ParsedMetric,
config: RuntimeConfig,
manifest: Manifest,
package_name: str,
) -> Dict[str, Any]:
project = config.load_dependencies()[package_name]
return {
'ref': MetricRefResolver(
None,
metric,
project,
manifest,
),
}
# This class is currently used by the schema parser in order
# to limit the number of macros in the context by using
# the TestMacroNamespace
@@ -1463,13 +1388,8 @@ class TestContext(ProviderContext):
# 'depends_on.macros' by using the TestMacroNamespace
def _build_test_namespace(self):
depends_on_macros = []
# all generic tests use a macro named 'get_where_subquery' to wrap 'model' arg
# see generic_test_builders.build_model_str
get_where_subquery = self.macro_resolver.macros_by_name.get('get_where_subquery')
if get_where_subquery:
depends_on_macros.append(get_where_subquery.unique_id)
if self.model.depends_on and self.model.depends_on.macros:
depends_on_macros.extend(self.model.depends_on.macros)
depends_on_macros = self.model.depends_on.macros
lookup_macros = depends_on_macros.copy()
for macro_unique_id in lookup_macros:
lookup_macro = self.macro_resolver.macros.get(macro_unique_id)
@@ -1482,30 +1402,6 @@ class TestContext(ProviderContext):
)
self.namespace = macro_namespace
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
return_value = None
if var.startswith(SECRET_ENV_PREFIX):
disallow_secret_env_var(var)
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
# Save the env_var value in the manifest and the var name in the source_file
if self.model:
self.manifest.env_vars[var] = return_value
# the "model" should only be test nodes, but just in case, check
if self.model.resource_type == NodeType.Test and self.model.file_key_name:
source_file = self.manifest.files[self.model.file_id]
(yaml_key, name) = self.model.file_key_name.split('.')
source_file.add_env_var(var, yaml_key, name)
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
def generate_test_context(
model: ManifestNode,

View File

@@ -1,40 +0,0 @@
import os
from typing import Any, Dict, Optional
from .base import BaseContext, contextmember
from dbt.exceptions import raise_parsing_error
class SecretContext(BaseContext):
"""This context is used in profiles.yml + packages.yml. It can render secret
env vars that aren't usable elsewhere"""
@contextmember
def env_var(self, var: str, default: Optional[str] = None) -> str:
"""The env_var() function. Return the environment variable named 'var'.
If there is no such environment variable set, return the default.
If the default is None, raise an exception for an undefined variable.
In this context *only*, env_var will return the actual values of
env vars prefixed with DBT_ENV_SECRET_
"""
return_value = None
if var in os.environ:
return_value = os.environ[var]
elif default is not None:
return_value = default
if return_value is not None:
self.env_vars[var] = return_value
return return_value
else:
msg = f"Env var required but not provided: '{var}'"
raise_parsing_error(msg)
def generate_secret_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
ctx = SecretContext(cli_vars)
# This is not a Mashumaro to_dict call
return ctx.to_dict()

View File

@@ -8,7 +8,6 @@ from dbt.context.base import (
class TargetContext(BaseContext):
# subclass is ConfiguredContext
def __init__(self, config: HasCredentials, cli_vars: Dict[str, Any]):
super().__init__(cli_vars=cli_vars)
self.config = config

View File

@@ -1 +0,0 @@
# Contracts README

View File

@@ -7,8 +7,7 @@ from typing import (
)
from dbt.exceptions import InternalException
from dbt.utils import translate_aliases
from dbt.events.functions import fire_event
from dbt.events.types import NewConnectionOpening
from dbt.logger import GLOBAL_LOGGER as logger
from typing_extensions import Protocol
from dbt.dataclass_schema import (
dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
@@ -102,7 +101,10 @@ class LazyHandle:
self.opener = opener
def resolve(self, connection: Connection) -> Connection:
fire_event(NewConnectionOpening(connection_state=connection.state))
logger.debug(
'Opening a new connection, currently in state {}'
.format(connection.state)
)
return self.opener(connection)

View File

@@ -190,7 +190,6 @@ class SourceFile(BaseSourceFile):
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
env_vars: List[str] = field(default_factory=list)
@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
@@ -223,7 +222,6 @@ class SchemaSourceFile(BaseSourceFile):
tests: Dict[str, Any] = field(default_factory=dict)
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
# any macro patches in this file by macro unique_id.
@@ -232,7 +230,6 @@ class SchemaSourceFile(BaseSourceFile):
# Patches are only against external sources. Sources can be
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None
@@ -255,7 +252,7 @@ class SchemaSourceFile(BaseSourceFile):
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
# Remove partial parsing specific data
for key in ('pp_test_index', 'pp_dict'):
for key in ('pp_files', 'pp_test_index', 'pp_dict'):
if key in dct:
del dct[key]
return dct
@@ -302,21 +299,5 @@ class SchemaSourceFile(BaseSourceFile):
test_ids.extend(self.tests[key][name])
return test_ids
def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
if name not in self.env_vars[yaml_key]:
self.env_vars[yaml_key][name] = []
if var not in self.env_vars[yaml_key][name]:
self.env_vars[yaml_key][name].append(var)
def delete_from_env_vars(self, yaml_key, name):
# We delete all vars for this yaml_key/name because the
# entry has been scheduled for reparsing.
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
del self.env_vars[yaml_key][name]
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]
AnySourceFile = Union[SchemaSourceFile, SourceFile]

View File

@@ -6,10 +6,8 @@ from dbt.contracts.graph.parsed import (
ParsedHookNode,
ParsedModelNode,
ParsedExposure,
ParsedMetric,
ParsedResource,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -83,17 +81,11 @@ class CompiledModelNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class CompiledRPCNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class CompiledSqlNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
@dataclass
class CompiledSeedNode(CompiledNode):
# keep this in sync with ParsedSeedNode!
@@ -127,7 +119,6 @@ class CompiledGenericTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type:ignore
@@ -151,7 +142,6 @@ PARSED_TYPES: Dict[Type[CompiledNode], Type[ParsedResource]] = {
CompiledModelNode: ParsedModelNode,
CompiledHookNode: ParsedHookNode,
CompiledRPCNode: ParsedRPCNode,
CompiledSqlNode: ParsedSqlNode,
CompiledSeedNode: ParsedSeedNode,
CompiledSnapshotNode: ParsedSnapshotNode,
CompiledSingularTestNode: ParsedSingularTestNode,
@@ -164,7 +154,6 @@ COMPILED_TYPES: Dict[Type[ParsedResource], Type[CompiledNode]] = {
ParsedModelNode: CompiledModelNode,
ParsedHookNode: CompiledHookNode,
ParsedRPCNode: CompiledRPCNode,
ParsedSqlNode: CompiledSqlNode,
ParsedSeedNode: CompiledSeedNode,
ParsedSnapshotNode: CompiledSnapshotNode,
ParsedSingularTestNode: CompiledSingularTestNode,
@@ -200,7 +189,6 @@ NonSourceCompiledNode = Union[
CompiledModelNode,
CompiledHookNode,
CompiledRPCNode,
CompiledSqlNode,
CompiledGenericTestNode,
CompiledSeedNode,
CompiledSnapshotNode,
@@ -212,7 +200,6 @@ NonSourceParsedNode = Union[
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -233,10 +220,8 @@ CompileResultNode = Union[
ParsedSourceDefinition,
]
# anything that participates in the graph: sources, exposures, metrics,
# or manifest nodes
# anything that participates in the graph: sources, exposures, manifest nodes
GraphMemberNode = Union[
CompileResultNode,
ParsedExposure,
ParsedMetric,
]

View File

@@ -15,8 +15,8 @@ from dbt.contracts.graph.compiled import (
)
from dbt.contracts.graph.parsed import (
ParsedMacro, ParsedDocumentation,
ParsedSourceDefinition, ParsedExposure, ParsedMetric,
HasUniqueID, UnpatchedSourceDefinition, ManifestNodes
ParsedSourceDefinition, ParsedExposure, HasUniqueID,
UnpatchedSourceDefinition, ManifestNodes
)
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
@@ -29,8 +29,7 @@ from dbt.exceptions import (
raise_duplicate_resource_name, raise_compiler_error,
)
from dbt.helper_types import PathSet
from dbt.events.functions import fire_event
from dbt.events.types import MergedFromState
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt.ui import line_wrap_message
from dbt import flags
@@ -547,8 +546,6 @@ class ParsingInfo:
@dataclass
class ManifestStateCheck(dbtClassMixin):
vars_hash: FileHash = field(default_factory=FileHash.empty)
project_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_env_vars_hash: FileHash = field(default_factory=FileHash.empty)
profile_hash: FileHash = field(default_factory=FileHash.empty)
project_hashes: MutableMapping[str, FileHash] = field(default_factory=dict)
@@ -565,7 +562,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros: MutableMapping[str, ParsedMacro] = field(default_factory=dict)
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
metrics: MutableMapping[str, ParsedMetric] = field(default_factory=dict)
selectors: MutableMapping[str, Any] = field(default_factory=dict)
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
@@ -573,7 +569,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
state_check: ManifestStateCheck = field(default_factory=ManifestStateCheck)
source_patches: MutableMapping[SourceKey, SourcePatch] = field(default_factory=dict)
disabled: MutableMapping[str, List[CompileResultNode]] = field(default_factory=dict)
env_vars: MutableMapping[str, str] = field(default_factory=dict)
_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None}
@@ -633,9 +628,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def update_exposure(self, new_exposure: ParsedExposure):
_update_into(self.exposures, new_exposure)
def update_metric(self, new_metric: ParsedMetric):
_update_into(self.metrics, new_metric)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
@@ -653,10 +645,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
k: v.to_dict(omit_none=False)
for k, v in self.exposures.items()
},
'metrics': {
k: v.to_dict(omit_none=False)
for k, v in self.metrics.items()
},
'nodes': {
k: v.to_dict(omit_none=False)
for k, v in self.nodes.items()
@@ -709,12 +697,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def get_resource_fqns(self) -> Mapping[str, PathSet]:
resource_fqns: Dict[str, Set[Tuple[str, ...]]] = {}
all_resources = chain(
self.exposures.values(),
self.nodes.values(),
self.sources.values(),
self.metrics.values()
)
all_resources = chain(self.exposures.values(), self.nodes.values(), self.sources.values())
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
if resource_type_plural not in resource_fqns:
@@ -742,7 +725,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros={k: _deepcopy(v) for k, v in self.macros.items()},
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
disabled={k: _deepcopy(v) for k, v in self.disabled.items()},
@@ -755,7 +737,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.nodes.values(),
self.sources.values(),
self.exposures.values(),
self.metrics.values(),
))
forward_edges, backward_edges = build_node_edges(edge_members)
self.child_map = forward_edges
@@ -777,7 +758,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
macros=self.macros,
docs=self.docs,
exposures=self.exposures,
metrics=self.metrics,
selectors=self.selectors,
metadata=self.metadata,
disabled=self.disabled,
@@ -797,8 +777,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return self.sources[unique_id]
elif unique_id in self.exposures:
return self.exposures[unique_id]
elif unique_id in self.metrics:
return self.metrics[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.InternalException(
@@ -959,7 +937,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
# log up to 5 items
sample = list(islice(merged, 5))
fire_event(MergedFromState(nbr_merged=len(merged), sample=sample))
logger.debug(
f'Merged {len(merged)} items from state (sample: {sample})'
)
# Methods that were formerly in ParseResult
@@ -1025,11 +1005,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)
def add_metric(self, source_file: SchemaSourceFile, metric: ParsedMetric):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
source_file.metrics.append(metric.unique_id)
def add_disabled_nofile(self, node: CompileResultNode):
# There can be multiple disabled nodes for the same unique_id
if node.unique_id in self.disabled:
@@ -1066,7 +1041,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.macros,
self.docs,
self.exposures,
self.metrics,
self.selectors,
self.files,
self.metadata,
@@ -1074,7 +1048,6 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.state_check,
self.source_patches,
self.disabled,
self.env_vars,
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
@@ -1097,7 +1070,7 @@ AnyManifest = Union[Manifest, MacroManifest]
@dataclass
@schema_version('manifest', 4)
@schema_version('manifest', 3)
class WritableManifest(ArtifactMixin):
nodes: Mapping[UniqueID, ManifestNode] = field(
metadata=dict(description=(
@@ -1124,11 +1097,6 @@ class WritableManifest(ArtifactMixin):
'The exposures defined in the dbt project and its dependencies'
))
)
metrics: Mapping[UniqueID, ParsedMetric] = field(
metadata=dict(description=(
'The metrics defined in the dbt project and its dependencies'
))
)
selectors: Mapping[UniqueID, Any] = field(
metadata=dict(description=(
'The selectors defined in selectors.yml'

View File

@@ -26,10 +26,11 @@ from dbt.contracts.graph.unparsed import (
UnparsedBaseNode, FreshnessThreshold, ExternalTable,
HasYamlMetadata, MacroArgument, UnparsedSourceDefinition,
UnparsedSourceTableDefinition, UnparsedColumn, TestDef,
ExposureOwner, ExposureType, MaturityType, MetricFilter
ExposureOwner, ExposureType, MaturityType
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.exceptions import warn_or_error
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt import flags
from dbt.node_types import NodeType
@@ -150,7 +151,7 @@ class ParsedNodeMixins(dbtClassMixin):
# Note: config should already be updated
self.patch_path: Optional[str] = patch.file_id
# update created_at so process_docs will run in partial parsing
self.created_at = time.time()
self.created_at = int(time.time())
self.description = patch.description
self.columns = patch.columns
self.meta = patch.meta
@@ -178,26 +179,7 @@ class ParsedNodeMandatory(
@dataclass
class NodeInfoMixin():
_event_status: Dict[str, Any] = field(default_factory=dict)
@property
def node_info(self):
node_info = {
"node_path": getattr(self, 'path', None),
"node_name": getattr(self, 'name', None),
"unique_id": getattr(self, 'unique_id', None),
"resource_type": str(getattr(self, 'resource_type', '')),
"materialized": self.config.get('materialized'),
"node_status": str(self._event_status.get('node_status')),
"node_started_at": self._event_status.get("started_at"),
"node_finished_at": self._event_status.get("finished_at")
}
return node_info
@dataclass
class ParsedNodeDefaults(NodeInfoMixin, ParsedNodeMandatory):
class ParsedNodeDefaults(ParsedNodeMandatory):
tags: List[str] = field(default_factory=list)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
@@ -211,7 +193,7 @@ class ParsedNodeDefaults(NodeInfoMixin, ParsedNodeMandatory):
build_path: Optional[str] = None
deferred: bool = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
config_call_dict: Dict[str, Any] = field(default_factory=dict)
def write_node(self, target_path: str, subdirectory: str, payload: str):
@@ -242,8 +224,6 @@ class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
def __post_serialize__(self, dct):
if 'config_call_dict' in dct:
del dct['config_call_dict']
if '_event_status' in dct:
del dct['_event_status']
return dct
@classmethod
@@ -260,8 +240,6 @@ class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins, SerializableType):
return ParsedSeedNode.from_dict(dct)
elif resource_type == 'rpc':
return ParsedRPCNode.from_dict(dct)
elif resource_type == 'sql':
return ParsedSqlNode.from_dict(dct)
elif resource_type == 'test':
if 'test_metadata' in dct:
return ParsedGenericTestNode.from_dict(dct)
@@ -363,17 +341,11 @@ class ParsedModelNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]})
# TODO: rm?
@dataclass
class ParsedRPCNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.RPCCall]})
@dataclass
class ParsedSqlNode(ParsedNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.SqlOperation]})
def same_seeds(first: ParsedNode, second: ParsedNode) -> bool:
# for seeds, we check the hashes. If the hashes are different types,
# no match. If the hashes are both the same 'path', log a warning and
@@ -430,9 +402,6 @@ class ParsedSeedNode(ParsedNode):
@dataclass
class TestMetadata(dbtClassMixin, Replaceable):
name: str
# kwargs are the args that are left in the test builder after
# removing configs. They are set from the test builder when
# the test node is created.
kwargs: Dict[str, Any] = field(default_factory=dict)
namespace: Optional[str] = None
@@ -449,17 +418,12 @@ class ParsedSingularTestNode(ParsedNode):
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
@property
def test_node_type(self):
return 'singular'
@dataclass
class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
# keep this in sync with CompiledGenericTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
file_key_name: Optional[str] = None
# Was not able to make mypy happy and keep the code working. We need to
# refactor the various configs.
config: TestConfig = field(default_factory=TestConfig) # type: ignore
@@ -474,10 +438,6 @@ class ParsedGenericTestNode(ParsedNode, HasTestMetadata):
True
)
@property
def test_node_type(self):
return 'generic'
@dataclass
class IntermediateSnapshotNode(ParsedNode):
@@ -533,12 +493,12 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
arguments: List[MacroArgument] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
def patch(self, patch: ParsedMacroPatch):
self.patch_path: Optional[str] = patch.file_id
self.description = patch.description
self.created_at = time.time()
self.created_at = int(time.time())
self.meta = patch.meta
self.docs = patch.docs
self.arguments = patch.arguments
@@ -628,11 +588,12 @@ class UnpatchedSourceDefinition(UnparsedBaseNode, HasUniqueID, HasFqn):
@dataclass
class ParsedSourceMandatory(
class ParsedSourceDefinition(
UnparsedBaseNode,
HasUniqueID,
HasRelationMetadata,
HasFqn,
):
name: str
source_name: str
@@ -640,13 +601,6 @@ class ParsedSourceMandatory(
loader: str
identifier: str
resource_type: NodeType = field(metadata={'restrict': [NodeType.Source]})
@dataclass
class ParsedSourceDefinition(
NodeInfoMixin,
ParsedSourceMandatory
):
quoting: Quoting = field(default_factory=Quoting)
loaded_at_field: Optional[str] = None
freshness: Optional[FreshnessThreshold] = None
@@ -660,12 +614,7 @@ class ParsedSourceDefinition(
patch_path: Optional[Path] = None
unrendered_config: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
created_at: float = field(default_factory=lambda: time.time())
def __post_serialize__(self, dct):
if '_event_status' in dct:
del dct['_event_status']
return dct
created_at: int = field(default_factory=lambda: int(time.time()))
def same_database_representation(
self, other: 'ParsedSourceDefinition'
@@ -776,7 +725,7 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
created_at: int = field(default_factory=lambda: int(time.time()))
@property
def depends_on_nodes(self):
@@ -822,88 +771,12 @@ class ParsedExposure(UnparsedBaseNode, HasUniqueID, HasFqn):
)
@dataclass
class ParsedMetric(UnparsedBaseNode, HasUniqueID, HasFqn):
model: str
name: str
description: str
label: str
type: str
sql: Optional[str]
timestamp: Optional[str]
filters: List[MetricFilter]
time_grains: List[str]
dimensions: List[str]
resource_type: NodeType = NodeType.Metric
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@property
def search_name(self):
return self.name
def same_model(self, old: 'ParsedMetric') -> bool:
return self.model == old.model
def same_dimensions(self, old: 'ParsedMetric') -> bool:
return self.dimensions == old.dimensions
def same_filters(self, old: 'ParsedMetric') -> bool:
return self.filters == old.filters
def same_description(self, old: 'ParsedMetric') -> bool:
return self.description == old.description
def same_label(self, old: 'ParsedMetric') -> bool:
return self.label == old.label
def same_type(self, old: 'ParsedMetric') -> bool:
return self.type == old.type
def same_sql(self, old: 'ParsedMetric') -> bool:
return self.sql == old.sql
def same_timestamp(self, old: 'ParsedMetric') -> bool:
return self.timestamp == old.timestamp
def same_time_grains(self, old: 'ParsedMetric') -> bool:
return self.time_grains == old.time_grains
def same_contents(self, old: Optional['ParsedMetric']) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True
return (
self.same_model(old) and
self.same_dimensions(old) and
self.same_filters(old) and
self.same_description(old) and
self.same_label(old) and
self.same_type(old) and
self.same_sql(old) and
self.same_timestamp(old) and
self.same_time_grains(old) and
True
)
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedSingularTestNode,
ParsedHookNode,
ParsedModelNode,
ParsedRPCNode,
ParsedSqlNode,
ParsedGenericTestNode,
ParsedSeedNode,
ParsedSnapshotNode,
@@ -915,6 +788,5 @@ ParsedResource = Union[
ParsedMacro,
ParsedNode,
ParsedExposure,
ParsedMetric,
ParsedSourceDefinition,
]

View File

@@ -60,7 +60,6 @@ class UnparsedNode(UnparsedBaseNode, HasSQL):
NodeType.Operation,
NodeType.Seed,
NodeType.RPCCall,
NodeType.SqlOperation,
]})
@property
@@ -168,25 +167,20 @@ class TimePeriod(StrEnum):
@dataclass
class Time(dbtClassMixin, Mergeable):
count: Optional[int] = None
period: Optional[TimePeriod] = None
class Time(dbtClassMixin, Replaceable):
count: int
period: TimePeriod
def exceeded(self, actual_age: float) -> bool:
if self.period is None or self.count is None:
return False
kwargs: Dict[str, int] = {self.period.plural(): self.count}
kwargs = {self.period.plural(): self.count}
difference = timedelta(**kwargs).total_seconds()
return actual_age > difference
def __bool__(self):
return self.count is not None and self.period is not None
@dataclass
class FreshnessThreshold(dbtClassMixin, Mergeable):
warn_after: Optional[Time] = field(default_factory=Time)
error_after: Optional[Time] = field(default_factory=Time)
warn_after: Optional[Time] = None
error_after: Optional[Time] = None
filter: Optional[str] = None
def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus":
@@ -199,7 +193,7 @@ class FreshnessThreshold(dbtClassMixin, Mergeable):
return FreshnessStatus.Pass
def __bool__(self):
return bool(self.warn_after) or bool(self.error_after)
return self.warn_after is not None or self.error_after is not None
@dataclass
@@ -285,7 +279,7 @@ class UnparsedSourceDefinition(dbtClassMixin, Replaceable):
def __post_serialize__(self, dct):
dct = super().__post_serialize__(dct)
if 'freshness' not in dct and self.freshness is None:
if 'freshnewss' not in dct and self.freshness is None:
dct['freshness'] = None
return dct
@@ -446,27 +440,3 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
tags: List[str] = field(default_factory=list)
url: Optional[str] = None
depends_on: List[str] = field(default_factory=list)
@dataclass
class MetricFilter(dbtClassMixin, Replaceable):
field: str
operator: str
# TODO : Can we make this Any?
value: str
@dataclass
class UnparsedMetric(dbtClassMixin, Replaceable):
model: str
name: str
label: str
type: str
description: str = ''
sql: Optional[str] = None
timestamp: Optional[str] = None
time_grains: List[str] = field(default_factory=list)
dimensions: List[str] = field(default_factory=list)
filters: List[MetricFilter] = field(default_factory=list)
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)

View File

@@ -1,6 +1,7 @@
from dbt.contracts.util import Replaceable, Mergeable, list_str
from dbt.contracts.connection import QueryComment, UserConfigContract
from dbt.helper_types import NoValue
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.dataclass_schema import (
dbtClassMixin, ValidationError,
HyphenatedDbtClassMixin,
@@ -18,18 +19,6 @@ DEFAULT_SEND_ANONYMOUS_USAGE_STATS = True
class Name(ValidatedStringMixin):
ValidationRegex = r'^[^\d\W]\w*$'
@classmethod
def is_valid(cls, value: Any) -> bool:
if not isinstance(value, str):
return False
try:
cls.validate(value)
except ValidationError:
return False
return True
register_pattern(Name, r'^[^\d\W]\w*$')
@@ -243,13 +232,12 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
printer_width: Optional[int] = None
write_json: Optional[bool] = None
warn_error: Optional[bool] = None
log_format: Optional[str] = None
log_format: Optional[bool] = None
debug: Optional[bool] = None
version_check: Optional[bool] = None
fail_fast: Optional[bool] = None
use_experimental_parser: Optional[bool] = None
static_parser: Optional[bool] = None
indirect_selection: Optional[str] = None
@dataclass

View File

@@ -11,11 +11,10 @@ from dbt.contracts.util import (
schema_version,
)
from dbt.exceptions import InternalException
from dbt.events.functions import fire_event
from dbt.events.types import TimingInfoCollected
from dbt.logger import (
TimingProcessor,
JsonOnly,
GLOBAL_LOGGER as logger,
)
from dbt.utils import lowercase
from dbt.dataclass_schema import dbtClassMixin, StrEnum
@@ -55,13 +54,7 @@ class collect_timing_info:
def __exit__(self, exc_type, exc_value, traceback):
self.timing_info.end()
with JsonOnly(), TimingProcessor(self.timing_info):
fire_event(TimingInfoCollected())
class RunningStatus(StrEnum):
Started = 'started'
Compiling = 'compiling'
Executing = 'executing'
logger.debug('finished collecting timing info')
class NodeStatus(StrEnum):
@@ -192,7 +185,7 @@ class RunExecutionResult(
@dataclass
@schema_version('run-results', 4)
@schema_version('run-results', 3)
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
results: Sequence[RunResultOutput]
args: Dict[str, Any] = field(default_factory=dict)
@@ -376,7 +369,7 @@ class FreshnessResult(ExecutionResult):
@dataclass
@schema_version('sources', 3)
@schema_version('sources', 2)
class FreshnessExecutionResultArtifact(
ArtifactMixin,
VersionedSchema,

819
core/dbt/contracts/rpc.py Normal file
View File

@@ -0,0 +1,819 @@
import enum
import os
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Optional, Union, List, Any, Dict, Type, Sequence
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.graph.manifest import WritableManifest
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
CatalogArtifact,
CatalogResults,
ExecutionResult,
FreshnessExecutionResultArtifact,
FreshnessResult,
RunOperationResult,
RunOperationResultsArtifact,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.exceptions import InternalException
from dbt.logger import LogMessage
from dbt.utils import restrict_to
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Inputs
@dataclass
class RPCParameters(dbtClassMixin):
task_tags: TaskTags
timeout: Optional[float]
@classmethod
def __pre_deserialize__(cls, data, omit_none=True):
data = super().__pre_deserialize__(data)
if 'timeout' not in data:
data['timeout'] = None
if 'task_tags' not in data:
data['task_tags'] = None
return data
@dataclass
class RPCExecParameters(RPCParameters):
name: str
sql: str
macros: Optional[str] = None
@dataclass
class RPCCompileParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCListParameters(RPCParameters):
resource_types: Optional[List[str]] = None
models: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
selector: Optional[str] = None
output: Optional[str] = 'json'
output_keys: Optional[List[str]] = None
@dataclass
class RPCRunParameters(RPCParameters):
threads: Optional[int] = None
models: Union[None, str, List[str]] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSnapshotParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
@dataclass
class RPCTestParameters(RPCCompileParameters):
data: bool = False
schema: bool = False
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCSeedParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
show: bool = False
state: Optional[str] = None
@dataclass
class RPCDocsGenerateParameters(RPCParameters):
compile: bool = True
state: Optional[str] = None
@dataclass
class RPCBuildParameters(RPCParameters):
resource_types: Optional[List[str]] = None
select: Union[None, str, List[str]] = None
threads: Optional[int] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
state: Optional[str] = None
defer: Optional[bool] = None
@dataclass
class RPCCliParameters(RPCParameters):
cli: str
@dataclass
class RPCDepsParameters(RPCParameters):
pass
@dataclass
class KillParameters(RPCParameters):
task_id: TaskID
@dataclass
class PollParameters(RPCParameters):
request_token: TaskID
logs: bool = True
logs_start: int = 0
@dataclass
class PSParameters(RPCParameters):
active: bool = True
completed: bool = False
@dataclass
class StatusParameters(RPCParameters):
pass
@dataclass
class GCSettings(dbtClassMixin):
# start evicting the longest-ago-ended tasks here
maxsize: int
# start evicting all tasks before now - auto_reap_age when we have this
# many tasks in the table
reapsize: int
# a positive timedelta indicating how far back we should go
auto_reap_age: timedelta
@dataclass
class GCParameters(RPCParameters):
"""The gc endpoint takes three arguments, any of which may be present:
- task_ids: An optional list of task ID UUIDs to try to GC
- before: If provided, should be a datetime string. All tasks that finished
before that datetime will be GCed
- settings: If provided, should be a GCSettings object in JSON form. It
will be applied to the task manager before GC starts. By default the
existing gc settings remain.
"""
task_ids: Optional[List[TaskID]] = None
before: Optional[datetime] = None
settings: Optional[GCSettings] = None
@dataclass
class RPCRunOperationParameters(RPCParameters):
macro: str
args: Dict[str, Any] = field(default_factory=dict)
@dataclass
class RPCSourceFreshnessParameters(RPCParameters):
threads: Optional[int] = None
select: Union[None, str, List[str]] = None
exclude: Union[None, str, List[str]] = None
selector: Optional[str] = None
@dataclass
class GetManifestParameters(RPCParameters):
pass
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
@schema_version('remote-list-results', 1)
class RemoteListResults(RemoteResult):
output: List[Any]
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-deps-result', 1)
class RemoteDepsResult(RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@dataclass
@schema_version('remote-catalog-result', 1)
class RemoteCatalogResults(CatalogResults, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
artifact = CatalogArtifact.from_results(
generated_at=self.generated_at,
nodes=self.nodes,
sources=self.sources,
compile_results=self._compile_results,
errors=self.errors,
)
artifact.write(path)
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-operation-result', 1)
class RemoteRunOperationResult(RunOperationResult, RemoteResult):
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_local_result(
cls,
base: RunOperationResultsArtifact,
logs: List[LogMessage],
) -> 'RemoteRunOperationResult':
return cls(
generated_at=base.metadata.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
success=base.success,
logs=logs,
)
def write(self, path: str):
writable = RunOperationResultsArtifact.from_success(
success=self.success,
generated_at=self.generated_at,
elapsed_time=self.elapsed_time,
)
writable.write(path)
@dataclass
@schema_version('remote-freshness-result', 1)
class RemoteFreshnessResult(FreshnessResult, RemoteResult):
@classmethod
def from_local_result(
cls,
base: FreshnessResult,
logs: List[LogMessage],
) -> 'RemoteFreshnessResult':
return cls(
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
)
def write(self, path: str):
writable = FreshnessExecutionResultArtifact.from_result(base=self)
writable.write(path)
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)
RPCResult = Union[
RemoteCompileResult,
RemoteExecutionResult,
RemoteFreshnessResult,
RemoteCatalogResults,
RemoteDepsResult,
RemoteRunOperationResult,
]
# GC types
class GCResultState(StrEnum):
Deleted = 'deleted' # successful GC
Missing = 'missing' # nothing to GC
Running = 'running' # can't GC
@dataclass
@schema_version('remote-gc-result', 1)
class GCResult(RemoteResult):
logs: List[LogMessage] = field(default_factory=list)
deleted: List[TaskID] = field(default_factory=list)
missing: List[TaskID] = field(default_factory=list)
running: List[TaskID] = field(default_factory=list)
def add_result(self, task_id: TaskID, state: GCResultState):
if state == GCResultState.Missing:
self.missing.append(task_id)
elif state == GCResultState.Running:
self.running.append(task_id)
elif state == GCResultState.Deleted:
self.deleted.append(task_id)
else:
raise InternalException(
f'Got invalid state in add_result: {state}'
)
# Task management types
class TaskHandlerState(StrEnum):
NotStarted = 'not started'
Initializing = 'initializing'
Running = 'running'
Success = 'success'
Error = 'error'
Killed = 'killed'
Failed = 'failed'
def __lt__(self, other) -> bool:
"""A logical ordering for TaskHandlerState:
NotStarted < Initializing < Running < (Success, Error, Killed, Failed)
"""
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other not in smaller
return False
def __le__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self < other) or
(self == other) or
(self.finished and other.finished))
def __gt__(self, other) -> bool:
if not isinstance(other, TaskHandlerState):
raise TypeError('cannot compare to non-TaskHandlerState')
order = (self.NotStarted, self.Initializing, self.Running)
smaller = set()
for value in order:
smaller.add(value)
if self == value:
return other in smaller
return other in smaller
def __ge__(self, other) -> bool:
# so that ((Success <= Error) is True)
return ((self > other) or
(self == other) or
(self.finished and other.finished))
@property
def finished(self) -> bool:
return self in (self.Error, self.Success, self.Killed, self.Failed)
@dataclass
class TaskTiming(dbtClassMixin):
state: TaskHandlerState
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
class TaskRow(TaskTiming):
task_id: TaskID
request_source: str
method: str
request_id: Union[str, int]
tags: TaskTags = None
timeout: Optional[float] = None
@dataclass
@schema_version('remote-ps-result', 1)
class PSResult(RemoteResult):
rows: List[TaskRow]
class KillResultStatus(StrEnum):
Missing = 'missing'
NotStarted = 'not_started'
Killed = 'killed'
Finished = 'finished'
@dataclass
@schema_version('remote-kill-result', 1)
class KillResult(RemoteResult):
state: KillResultStatus = KillResultStatus.Missing
logs: List[LogMessage] = field(default_factory=list)
@dataclass
@schema_version('remote-manifest-result', 1)
class GetManifestResult(RemoteResult):
manifest: Optional[WritableManifest] = None
# this is kind of carefuly structured: BlocksManifestTasks is implied by
# RequiresConfigReloadBefore and RequiresManifestReloadAfter
class RemoteMethodFlags(enum.Flag):
Empty = 0
BlocksManifestTasks = 1
RequiresConfigReloadBefore = 3
RequiresManifestReloadAfter = 5
Builtin = 8
# Polling types
@dataclass
class PollResult(RemoteResult, TaskTiming):
state: TaskHandlerState
tags: TaskTags
start: Optional[datetime]
end: Optional[datetime]
elapsed: Optional[float]
# These ought to be defaults but superclass order doesn't
# allow that to work
@classmethod
def __pre_deserialize__(cls, data):
data = super().__pre_deserialize__(data)
for field_name in ('start', 'end', 'elapsed'):
if field_name not in data:
data[field_name] = None
return data
@dataclass
@schema_version('poll-remote-deps-result', 1)
class PollRemoteEmptyCompleteResult(PollResult, RemoteResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
generated_at: datetime = field(default_factory=datetime.utcnow)
@classmethod
def from_result(
cls: Type['PollRemoteEmptyCompleteResult'],
base: RemoteDepsResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRemoteEmptyCompleteResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-killed-result', 1)
class PollKilledResult(PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Killed),
)
@dataclass
@schema_version('poll-remote-execution-result', 1)
class PollExecuteCompleteResult(
RemoteExecutionResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollExecuteCompleteResult'],
base: RemoteExecutionResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollExecuteCompleteResult':
return cls(
results=base.results,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at,
)
@dataclass
@schema_version('poll-remote-compile-result', 1)
class PollCompileCompleteResult(
RemoteCompileResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCompileCompleteResult'],
base: RemoteCompileResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCompileCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-result', 1)
class PollRunCompleteResult(
RemoteRunResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunCompleteResult'],
base: RemoteRunResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunCompleteResult':
return cls(
raw_sql=base.raw_sql,
compiled_sql=base.compiled_sql,
node=base.node,
timing=base.timing,
logs=logs,
table=base.table,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
generated_at=base.generated_at
)
@dataclass
@schema_version('poll-remote-run-operation-result', 1)
class PollRunOperationCompleteResult(
RemoteRunOperationResult,
PollResult,
):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollRunOperationCompleteResult'],
base: RemoteRunOperationResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollRunOperationCompleteResult':
return cls(
success=base.success,
results=base.results,
generated_at=base.generated_at,
elapsed_time=base.elapsed_time,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-catalog-result', 1)
class PollCatalogCompleteResult(RemoteCatalogResults, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollCatalogCompleteResult'],
base: RemoteCatalogResults,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollCatalogCompleteResult':
return cls(
nodes=base.nodes,
sources=base.sources,
generated_at=base.generated_at,
errors=base.errors,
_compile_results=base._compile_results,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-in-progress-result', 1)
class PollInProgressResult(PollResult):
pass
@dataclass
@schema_version('poll-remote-get-manifest-result', 1)
class PollGetManifestResult(GetManifestResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollGetManifestResult'],
base: GetManifestResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollGetManifestResult':
return cls(
manifest=base.manifest,
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
)
@dataclass
@schema_version('poll-remote-freshness-result', 1)
class PollFreshnessResult(RemoteFreshnessResult, PollResult):
state: TaskHandlerState = field(
metadata=restrict_to(TaskHandlerState.Success,
TaskHandlerState.Failed),
)
@classmethod
def from_result(
cls: Type['PollFreshnessResult'],
base: RemoteFreshnessResult,
tags: TaskTags,
timing: TaskTiming,
logs: List[LogMessage],
) -> 'PollFreshnessResult':
return cls(
logs=logs,
tags=tags,
state=timing.state,
start=timing.start,
end=timing.end,
elapsed=timing.elapsed,
metadata=base.metadata,
results=base.results,
elapsed_time=base.elapsed_time,
)
# Manifest parsing types
class ManifestStatus(StrEnum):
Init = 'init'
Compiling = 'compiling'
Ready = 'ready'
Error = 'error'
@dataclass
@schema_version('remote-status-result', 1)
class LastParse(RemoteResult):
state: ManifestStatus = ManifestStatus.Init
logs: List[LogMessage] = field(default_factory=list)
error: Optional[Dict[str, Any]] = None
timestamp: datetime = field(default_factory=datetime.utcnow)
pid: int = field(default_factory=os.getpid)

View File

@@ -1,88 +0,0 @@
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List, Any, Dict, Sequence
from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.results import (
RunResult, RunResultsArtifact, TimingInfo,
ExecutionResult,
RunExecutionResult,
)
from dbt.contracts.util import VersionedSchema, schema_version
from dbt.logger import LogMessage
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
# Outputs
@dataclass
class RemoteResult(VersionedSchema):
logs: List[LogMessage]
@dataclass
class RemoteCompileResultMixin(RemoteResult):
raw_sql: str
compiled_sql: str
node: CompileResultNode
timing: List[TimingInfo]
@dataclass
@schema_version('remote-compile-result', 1)
class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self):
return None
@dataclass
@schema_version('remote-execution-result', 1)
class RemoteExecutionResult(ExecutionResult, RemoteResult):
results: Sequence[RunResult]
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
elapsed_time=self.elapsed_time,
args=self.args,
)
writable.write(path)
@classmethod
def from_local_result(
cls,
base: RunExecutionResult,
logs: List[LogMessage],
) -> 'RemoteExecutionResult':
return cls(
generated_at=base.generated_at,
results=base.results,
elapsed_time=base.elapsed_time,
args=base.args,
logs=logs,
)
@dataclass
class ResultTable(dbtClassMixin):
column_names: List[str]
rows: List[Any]
@dataclass
@schema_version('remote-run-result', 1)
class RemoteRunResult(RemoteCompileResultMixin):
table: ResultTable
generated_at: datetime = field(default_factory=datetime.utcnow)

View File

@@ -14,8 +14,7 @@ class PreviousState:
manifest_path = self.path / 'manifest.json'
if manifest_path.exists() and manifest_path.is_file():
try:
# we want to bail with an error if schema versions don't match
self.manifest = WritableManifest.read_and_check_versions(str(manifest_path))
self.manifest = WritableManifest.read(str(manifest_path))
except IncompatibleSchemaException as exc:
exc.add_filename(str(manifest_path))
raise
@@ -23,8 +22,7 @@ class PreviousState:
results_path = self.path / 'run_results.json'
if results_path.exists() and results_path.is_file():
try:
# we want to bail with an error if schema versions don't match
self.results = RunResultsArtifact.read_and_check_versions(str(results_path))
self.results = RunResultsArtifact.read(str(results_path))
except IncompatibleSchemaException as exc:
exc.add_filename(str(results_path))
raise

View File

@@ -9,10 +9,9 @@ from dbt.clients.system import write_json, read_json
from dbt.exceptions import (
InternalException,
RuntimeException,
IncompatibleSchemaException
)
from dbt.version import __version__
from dbt.events.functions import get_invocation_id
from dbt.tracking import get_invocation_id
from dbt.dataclass_schema import dbtClassMixin
SourceKey = Tuple[str, str]
@@ -159,8 +158,6 @@ def get_metadata_env() -> Dict[str, str]:
}
# This is used in the ManifestMetadata, RunResultsMetadata, RunOperationResultMetadata,
# FreshnessMetadata, and CatalogMetadata classes
@dataclasses.dataclass
class BaseArtifactMetadata(dbtClassMixin):
dbt_schema_version: str
@@ -180,17 +177,6 @@ class BaseArtifactMetadata(dbtClassMixin):
return dct
# This is used as a class decorator to set the schema_version in the
# 'dbt_schema_version' class attribute. (It's copied into the metadata objects.)
# Name attributes of SchemaVersion in classes with the 'schema_version' decorator:
# manifest
# run-results
# run-operation-result
# sources
# catalog
# remote-compile-result
# remote-execution-result
# remote-run-result
def schema_version(name: str, version: int):
def inner(cls: Type[VersionedSchema]):
cls.dbt_schema_version = SchemaVersion(
@@ -201,7 +187,6 @@ def schema_version(name: str, version: int):
return inner
# This is used in the ArtifactMixin and RemoteResult classes
@dataclasses.dataclass
class VersionedSchema(dbtClassMixin):
dbt_schema_version: ClassVar[SchemaVersion]
@@ -213,30 +198,6 @@ class VersionedSchema(dbtClassMixin):
result['$id'] = str(cls.dbt_schema_version)
return result
@classmethod
def read_and_check_versions(cls, path: str):
try:
data = read_json(path)
except (EnvironmentError, ValueError) as exc:
raise RuntimeException(
f'Could not read {cls.__name__} at "{path}" as JSON: {exc}'
) from exc
# Check metadata version. There is a class variable 'dbt_schema_version', but
# that doesn't show up in artifacts, where it only exists in the 'metadata'
# dictionary.
if hasattr(cls, 'dbt_schema_version'):
if 'metadata' in data and 'dbt_schema_version' in data['metadata']:
previous_schema_version = data['metadata']['dbt_schema_version']
# cls.dbt_schema_version is a SchemaVersion object
if str(cls.dbt_schema_version) != previous_schema_version:
raise IncompatibleSchemaException(
expected=str(cls.dbt_schema_version),
found=previous_schema_version
)
return cls.from_dict(data) # type: ignore
T = TypeVar('T', bound='ArtifactMixin')
@@ -244,8 +205,6 @@ T = TypeVar('T', bound='ArtifactMixin')
# metadata should really be a Generic[T_M] where T_M is a TypeVar bound to
# BaseArtifactMetadata. Unfortunately this isn't possible due to a mypy issue:
# https://github.com/python/mypy/issues/7520
# This is used in the WritableManifest, RunResultsArtifact, RunOperationResultsArtifact,
# and CatalogArtifact
@dataclasses.dataclass(init=False)
class ArtifactMixin(VersionedSchema, Writable, Readable):
metadata: BaseArtifactMetadata

View File

@@ -22,7 +22,7 @@ class DateTimeSerialization(SerializationStrategy):
out = value.isoformat()
# Assume UTC if timezone is missing
if value.tzinfo is None:
out += "Z"
out = out + "Z"
return out
def deserialize(self, value):

View File

@@ -36,9 +36,9 @@ class DBTDeprecation:
if self.name not in active_deprecations:
desc = self.description.format(**kwargs)
msg = ui.line_wrap_message(
desc, prefix='Deprecated functionality\n\n'
desc, prefix='* Deprecation Warning: '
)
dbt.exceptions.warn_or_error(msg, log_fmt=ui.warning_tag('{}'))
dbt.exceptions.warn_or_error(msg)
self.track_deprecation_warn()
active_deprecations.add(self.name)
@@ -61,20 +61,13 @@ class PackageInstallPathDeprecation(DBTDeprecation):
class ConfigPathDeprecation(DBTDeprecation):
_name = 'project_config_path'
_description = '''\
The `{deprecated_path}` config has been renamed to `{exp_path}`.
The `{deprecated_path}` config has been deprecated in favor of `{exp_path}`.
Please update your `dbt_project.yml` configuration to reflect this change.
'''
class ConfigSourcePathDeprecation(ConfigPathDeprecation):
_name = 'project-config-source-paths'
class ConfigDataPathDeprecation(ConfigPathDeprecation):
_name = 'project-config-data-paths'
_adapter_renamed_description = """\
The adapter function `adapter.{old_name}` is deprecated and will be removed in
a future release of dbt. Please use `adapter.{new_name}` instead.
@@ -113,8 +106,7 @@ def warn(name, *args, **kwargs):
active_deprecations: Set[str] = set()
deprecations_list: List[DBTDeprecation] = [
ConfigSourcePathDeprecation(),
ConfigDataPathDeprecation(),
ConfigPathDeprecation(),
PackageInstallPathDeprecation(),
PackageRedirectDeprecation()
]

View File

@@ -1 +0,0 @@
# Deps README

View File

@@ -6,8 +6,7 @@ from typing import List, Optional, Generic, TypeVar
from dbt.clients import system
from dbt.contracts.project import ProjectPackageMetadata
from dbt.events.functions import fire_event
from dbt.events.types import DepsSetDownloadDirectory
from dbt.logger import GLOBAL_LOGGER as logger
DOWNLOADS_PATH = None
@@ -32,7 +31,7 @@ def downloads_directory():
remove_downloads = True
system.make_directory(DOWNLOADS_PATH)
fire_event(DepsSetDownloadDirectory(path=DOWNLOADS_PATH))
logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH))
yield DOWNLOADS_PATH

View File

@@ -12,8 +12,7 @@ from dbt.deps.base import PinnedPackage, UnpinnedPackage, get_downloads_path
from dbt.exceptions import (
ExecutableError, warn_or_error, raise_dependency_error
)
from dbt.events.functions import fire_event
from dbt.events.types import EnsureGitInstalled
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import ui
PIN_PACKAGE_URL = 'https://docs.getdbt.com/docs/package-management#section-specifying-package-versions' # noqa
@@ -82,7 +81,11 @@ class GitPinnedPackage(GitPackageMixin, PinnedPackage):
)
except ExecutableError as exc:
if exc.cmd and exc.cmd[0] == 'git':
fire_event(EnsureGitInstalled())
logger.error(
'Make sure git is installed on your machine. More '
'information: '
'https://docs.getdbt.com/docs/package-management'
)
raise
return os.path.join(get_downloads_path(), dir_)

View File

@@ -6,8 +6,7 @@ from dbt.contracts.project import (
ProjectPackageMetadata,
LocalPackage,
)
from dbt.events.functions import fire_event
from dbt.events.types import DepsCreatingLocalSymlink, DepsSymlinkNotAvailable
from dbt.logger import GLOBAL_LOGGER as logger
class LocalPackageMixin:
@@ -58,11 +57,12 @@ class LocalPinnedPackage(LocalPackageMixin, PinnedPackage):
system.remove_file(dest_path)
if can_create_symlink:
fire_event(DepsCreatingLocalSymlink())
logger.debug(' Creating symlink to local dependency.')
system.make_symlink(src_path, dest_path)
else:
fire_event(DepsSymlinkNotAvailable())
logger.debug(' Symlinks are not available on this '
'OS, copying dependency.')
shutil.copytree(src_path, dest_path)

View File

@@ -1,5 +1,4 @@
import os
import functools
from typing import List
from dbt import semver
@@ -15,7 +14,6 @@ from dbt.exceptions import (
DependencyException,
package_not_found,
)
from dbt.utils import _connection_exception_retry as connection_exception_retry
class RegistryPackageMixin:
@@ -70,28 +68,9 @@ class RegistryPinnedPackage(RegistryPackageMixin, PinnedPackage):
system.make_directory(os.path.dirname(tar_path))
download_url = metadata.downloads.tarball
system.download_with_retries(download_url, tar_path)
deps_path = project.packages_install_path
package_name = self.get_project_name(project, renderer)
download_untar_fn = functools.partial(
self.download_and_untar,
download_url,
tar_path,
deps_path,
package_name
)
connection_exception_retry(download_untar_fn, 5)
def download_and_untar(self, download_url, tar_path, deps_path, package_name):
"""
Sometimes the download of the files fails and we want to retry. Sometimes the
download appears successful but the file did not make it through as expected
(generally due to a github incident). Either way we want to retry downloading
and untarring to see if we can get a success. Call this within
`_connection_exception_retry`
"""
system.download(download_url, tar_path)
system.untar_package(tar_path, deps_path, package_name)
@@ -148,12 +127,9 @@ class RegistryUnpinnedPackage(
raise DependencyException(new_msg) from e
available = registry.get_available_versions(self.package)
prerelease_version_specified = any(
bool(version.prerelease) for version in self.versions
)
installable = semver.filter_installable(
available,
self.install_prerelease or prerelease_version_specified
self.install_prerelease
)
available_latest = installable[-1]

View File

@@ -3,6 +3,7 @@ from typing import Dict, List, NoReturn, Union, Type, Iterator, Set
from dbt.exceptions import raise_dependency_error, InternalException
from dbt.context.target import generate_target_context
from dbt.config import Project, RuntimeConfig
from dbt.config.renderer import DbtProjectYamlRenderer
from dbt.deps.base import BasePackage, PinnedPackage, UnpinnedPackage
@@ -125,7 +126,8 @@ def resolve_packages(
pending = PackageListing.from_contracts(packages)
final = PackageListing()
renderer = DbtProjectYamlRenderer(config, config.cli_vars)
ctx = generate_target_context(config, config.cli_vars)
renderer = DbtProjectYamlRenderer(ctx)
while pending:
next_pending = PackageListing()

View File

@@ -1,55 +1,12 @@
# Events Module
The Events module is responsible for communicating internal dbt structures into a consumable interface. Right now, the events module is exclusively used for structured logging, but in the future could grow to include other user-facing components such as exceptions. These events represent both a programatic interface to dbt processes as well as human-readable messaging in one centralized place. The centralization allows for leveraging mypy to enforce interface invariants across all dbt events, and the distinct type layer allows for decoupling events and libraries such as loggers.
The Events module is the implmentation for structured logging. These events represent both a programatic interface to dbt processes as well as human-readable messaging in one centralized place. The centralization allows for leveraging mypy to enforce interface invariants across all dbt events, and the distinct type layer allows for decoupling events and libraries such as loggers.
# Using the Events Module
The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `events.functions::fire_event` is the entry point to the module from everywhere in dbt.
# Logging
When events are processed via `fire_event`, nearly everything is logged. Whether or not the user has enabled the debug flag, all debug messages are still logged to the file. However, some events are particularly time consuming to construct because they return a huge amount of data. Today, the only messages in this category are cache events and are only logged if the `--log-cache-events` flag is on. This is important because these messages should not be created unless they are going to be logged, because they cause a noticable performance degredation. We achieve this by making the event class explicitly use lazy values for the expensive ones so they are not computed until the moment they are required. This is done with the data type `core/dbt/helper_types.py::Lazy` which includes usage documentation.
Example:
```
@dataclass
class DumpBeforeAddGraph(DebugLevel, Cache):
dump: Lazy[Dict[str, List[str]]]
code: str = "E031"
def message(self) -> str:
return f"before adding : {self.dump.force()}"
```
# Adding a New Event
In `events.types` add a new class that represents the new event. All events must be a dataclass with, at minimum, a code. You may also include some other values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `Cli`) as well as the loglevel this event belongs to. This system has been designed to take full advantage of mypy so running it will catch anything you may miss.
## Required for Every Event
- a string attribute `code`, that's unique across events
- assign a log level by extending `DebugLevel`, `InfoLevel`, `WarnLevel`, or `ErrorLevel`
- a message()
- extend `File` and/or `Cli` based on where it should output
Example
```
@dataclass
class PartialParsingDeletedExposure(DebugLevel, Cli, File):
unique_id: str
code: str = "I049"
def message(self) -> str:
return f"Partial parsing: deleted exposure {self.unique_id}"
```
## Optional (based on your event)
- Events associated with node status changes must be extended with `NodeInfo` which contains a node_info attribute
All values other than `code` and `node_info` will be included in the `data` node of the json log output.
Once your event has been added, add a dummy call to your new event at the bottom of `types.py` and also add your new Event to the list `sample_values` in `test/unit/test_events.py'.
In `events.types` add a new class that represents the new event. This may be a simple class with no values, or it may be a dataclass with some values to construct downstream messaging. Only include the data necessary to construct this message within this class. You must extend all destinations (e.g. - if your log message belongs on the cli, extend `CliEventABC`) as well as the loglevel this event belongs to.
# Adapter Maintainers
To integrate existing log messages from adapters, you likely have a line of code like this in your adapter already:

View File

@@ -3,63 +3,84 @@ from dbt.events.functions import fire_event
from dbt.events.types import (
AdapterEventDebug, AdapterEventInfo, AdapterEventWarning, AdapterEventError
)
from typing import Any
@dataclass
class AdapterLogger():
name: str
def debug(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventDebug(name=self.name, base_msg=msg, args=args)
def debug(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventDebug(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def info(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventInfo(name=self.name, base_msg=msg, args=args)
def info(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventInfo(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def warning(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventWarning(name=self.name, base_msg=msg, args=args)
def warning(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventWarning(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
def error(self, msg, *args, exc_info=None, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
def error(
self,
msg: str,
exc_info: Any = None,
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)
# The default exc_info=True is what makes this method different
def exception(self, msg, *args, exc_info=True, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
def exception(
self,
msg: str,
exc_info: Any = True, # this default is what makes this method different
stack_info: Any = None,
extra: Any = None
):
event = AdapterEventError(name=self.name, raw_msg=msg)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
fire_event(event)
def critical(self, msg, *args, exc_info=False, extra=None, stack_info=False):
event = AdapterEventError(name=self.name, base_msg=msg, args=args)
event.exc_info = exc_info
event.extra = extra
event.stack_info = stack_info
event.extra = extra
fire_event(event)

View File

@@ -1,120 +0,0 @@
from abc import ABCMeta, abstractproperty, abstractmethod
from dataclasses import dataclass
from dbt.events.serialization import EventSerialization
import os
import threading
from typing import Any, Dict
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# These base types define the _required structure_ for the concrete event #
# types defined in types.py #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
class Cache():
# Events with this class will only be logged when the `--log-cache-events` flag is passed
pass
@dataclass
class ShowException():
# N.B.:
# As long as we stick with the current convention of setting the member vars in the
# `message` method of subclasses, this is a safe operation.
# If that ever changes we'll want to reassess.
def __post_init__(self):
self.exc_info: Any = True
self.stack_info: Any = None
self.extra: Any = None
# TODO add exhaustiveness checking for subclasses
# top-level superclass for all events
class Event(metaclass=ABCMeta):
# Do not define fields with defaults here
# four digit string code that uniquely identifies this type of event
# uniqueness and valid characters are enforced by tests
@abstractproperty
@staticmethod
def code() -> str:
raise Exception("code() not implemented for event")
# The 'to_dict' method is added by mashumaro via the EventSerialization.
# It should be in all subclasses that are to record actual events.
@abstractmethod
def to_dict(self):
raise Exception('to_dict not implemented for Event')
# do not define this yourself. inherit it from one of the above level types.
@abstractmethod
def level_tag(self) -> str:
raise Exception("level_tag not implemented for Event")
# Solely the human readable message. Timestamps and formatting will be added by the logger.
# Must override yourself
@abstractmethod
def message(self) -> str:
raise Exception("msg not implemented for Event")
# exactly one pid per concrete event
def get_pid(self) -> int:
return os.getpid()
# in theory threads can change so we don't cache them.
def get_thread_name(self) -> str:
return threading.current_thread().getName()
@classmethod
def get_invocation_id(cls) -> str:
from dbt.events.functions import get_invocation_id
return get_invocation_id()
# in preparation for #3977
@dataclass # type: ignore[misc]
class TestLevel(EventSerialization, Event):
def level_tag(self) -> str:
return "test"
@dataclass # type: ignore[misc]
class DebugLevel(EventSerialization, Event):
def level_tag(self) -> str:
return "debug"
@dataclass # type: ignore[misc]
class InfoLevel(EventSerialization, Event):
def level_tag(self) -> str:
return "info"
@dataclass # type: ignore[misc]
class WarnLevel(EventSerialization, Event):
def level_tag(self) -> str:
return "warn"
@dataclass # type: ignore[misc]
class ErrorLevel(EventSerialization, Event):
def level_tag(self) -> str:
return "error"
# prevents an event from going to the file
class NoFile():
pass
# prevents an event from going to stdout
class NoStdOut():
pass
# This class represents the node_info which is generated
# by the NodeInfoMixin class in dbt.contracts.graph.parsed
@dataclass
class NodeInfo():
node_info: Dict[str, Any]

View File

@@ -1,49 +0,0 @@
from dbt import ui
from dbt.node_types import NodeType
from typing import Optional, Union
def format_fancy_output_line(
msg: str, status: str, index: Optional[int],
total: Optional[int], execution_time: Optional[float] = None,
truncate: bool = False
) -> str:
if index is None or total is None:
progress = ''
else:
progress = '{} of {} '.format(index, total)
prefix = "{progress}{message}".format(
progress=progress,
message=msg)
truncate_width = ui.printer_width() - 3
justified = prefix.ljust(ui.printer_width(), ".")
if truncate and len(justified) > truncate_width:
justified = justified[:truncate_width] + '...'
if execution_time is None:
status_time = ""
else:
status_time = " in {execution_time:0.2f}s".format(
execution_time=execution_time)
output = "{justified} [{status}{status_time}]".format(
justified=justified, status=status, status_time=status_time)
return output
def _pluralize(string: Union[str, NodeType]) -> str:
try:
convert = NodeType(string)
except ValueError:
return f'{string}s'
else:
return convert.pluralize()
def pluralize(count, string: Union[str, NodeType]):
pluralized: str = str(string)
if count != 1:
pluralized = _pluralize(string)
return f'{count} {pluralized}'

View File

@@ -1,315 +1,162 @@
import colorama
from colorama import Style
import dbt.events.functions as this # don't worry I hate it too.
from dbt.events.base_types import NoStdOut, Event, NoFile, ShowException, Cache
from dbt.events.types import EventBufferFull, T_Event, MainReportVersion, EmptyLine
from dbt.events.history import EVENT_HISTORY
from dbt.events.types import CliEventABC, Event, ShowException
import dbt.logger as logger # TODO remove references to this logger
import dbt.flags as flags
# TODO this will need to move eventually
from dbt.logger import SECRET_ENV_PREFIX, make_log_dir_if_missing, GLOBAL_LOGGER
from datetime import datetime
import json
import io
from io import StringIO, TextIOWrapper
import logbook
import logging.config
import logging
from logging import Logger
import sys
from logging.handlers import RotatingFileHandler
import os
import uuid
import threading
from typing import Any, Dict, List, Optional, Union
from collections import deque
global LOG_VERSION
LOG_VERSION = 2
# create the global event history buffer with the default max size (10k)
# python 3.7 doesn't support type hints on globals, but mypy requires them. hence the ignore.
# TODO the flags module has not yet been resolved when this is created
global EVENT_HISTORY
EVENT_HISTORY = deque(maxlen=flags.EVENT_BUFFER_SIZE) # type: ignore
# create the global file logger with no configuration
global FILE_LOG
FILE_LOG = logging.getLogger('default_file')
null_handler = logging.NullHandler()
FILE_LOG.addHandler(null_handler)
# set up logger to go to stdout with defaults
# setup_event_logger will be called once args have been parsed
global STDOUT_LOG
STDOUT_LOG = logging.getLogger('default_stdout')
STDOUT_LOG.setLevel(logging.INFO)
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setLevel(logging.INFO)
STDOUT_LOG.addHandler(stdout_handler)
format_color = True
format_json = False
invocation_id: Optional[str] = None
# Colorama needs some help on windows because we're using logger.info
# intead of print(). If the Windows env doesn't have a TERM var set,
# then we should override the logging stream to use the colorama
# converter. If the TERM var is set (as with Git Bash), then it's safe
# to send escape characters and no log handler injection is needed.
colorama_stdout = sys.stdout
colorama_wrap = True
colorama.init(wrap=colorama_wrap)
if sys.platform == 'win32' and not os.getenv('TERM'):
colorama_wrap = False
colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream
elif sys.platform == 'win32':
colorama_wrap = False
colorama.init(wrap=colorama_wrap)
import structlog
import sys
def setup_event_logger(log_path, level_override=None):
# flags have been resolved, and log_path is known
global EVENT_HISTORY
EVENT_HISTORY = deque(maxlen=flags.EVENT_BUFFER_SIZE) # type: ignore
# these two loggers be set up with CLI inputs via setup_event_logger
# DO NOT IMPORT AND USE THESE DIRECTLY
make_log_dir_if_missing(log_path)
this.format_json = flags.LOG_FORMAT == 'json'
global STDOUT_LOGGER
STDOUT_LOGGER = structlog.get_logger()
global FILE_LOGGER
FILE_LOGGER = structlog.get_logger()
def setup_event_logger(log_path):
logger.make_log_dir_if_missing(log_path)
json: bool = flags.LOG_FORMAT == 'json'
# USE_COLORS can be None if the app just started and the cli flags
# havent been applied yet
this.format_color = True if flags.USE_COLORS else False
colors: bool = True if flags.USE_COLORS else False
# TODO this default should live somewhere better
log_dest = os.path.join(log_path, 'dbt.log')
level = level_override or (logging.DEBUG if flags.DEBUG else logging.INFO)
log_dest = os.path.join(logger.LOG_DIR, 'dbt.log')
# overwrite the STDOUT_LOG logger with the configured one
this.STDOUT_LOG = logging.getLogger('configured_std_out')
this.STDOUT_LOG.setLevel(level)
# see: https://docs.python.org/3/library/logging.config.html#logging-config-dictschema
# logging.config.dictConfig({
# "version": 1,
# "disable_existing_loggers": False,
# "formatters": {
# "plain": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.dev.ConsoleRenderer(colors=False),
# "foreign_pre_chain": pre_chain,
# },
# "colored": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.dev.ConsoleRenderer(colors=True),
# "foreign_pre_chain": pre_chain,
# },
# "json": {
# "()": structlog.stdlib.ProcessorFormatter,
# "processor": structlog.processors.JSONRenderer(),
# "foreign_pre_chain": pre_chain,
# },
# },
# "handlers": {
# "console": {
# "level": "DEBUG",
# "class": "logging.StreamHandler",
# "formatter": "colored",
# },
# "file": {
# "level": "DEBUG",
# "class": "logging.handlers.WatchedFileHandler",
# # TODO this default should live somewhere better
# "filename": os.path.join(logger.LOG_DIR, 'dbt.log'),
# "formatter": "plain",
# },
# "json-console": {
# "level": "DEBUG",
# "class": "logging.StreamHandler",
# "formatter": "json",
# },
# "json-file": {
# "level": "DEBUG",
# "class": "logging.handlers.WatchedFileHandler",
# # TODO this default should live somewhere better
# "filename": os.path.join(logger.LOG_DIR, 'dbt.log.json'),
# "formatter": "json",
# },
# },
# "loggers": {
# "": {
# "handlers": ["json-console", "json-file"] if json else ["console", "file"],
# "level": "DEBUG" if flags.DEBUG else "INFO",
# "propagate": True,
# },
# }
# })
FORMAT = "%(message)s"
stdout_passthrough_formatter = logging.Formatter(fmt=FORMAT)
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(stdout_passthrough_formatter)
stdout_handler.setLevel(level)
# clear existing stdout TextIOWrapper stream handlers
this.STDOUT_LOG.handlers = [
h for h in this.STDOUT_LOG.handlers
if not (hasattr(h, 'stream') and isinstance(h.stream, TextIOWrapper)) # type: ignore
]
this.STDOUT_LOG.addHandler(stdout_handler)
# overwrite the FILE_LOG logger with the configured one
this.FILE_LOG = logging.getLogger('configured_file')
this.FILE_LOG.setLevel(logging.DEBUG) # always debug regardless of user input
file_passthrough_formatter = logging.Formatter(fmt=FORMAT)
file_handler = RotatingFileHandler(
filename=log_dest,
encoding='utf8',
maxBytes=10 * 1024 * 1024, # 10 mb
backupCount=5
# set-up global logging configurations
structlog.configure(
wrapper_class=structlog.stdlib.BoundLogger,
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=False,
)
file_handler.setFormatter(file_passthrough_formatter)
file_handler.setLevel(logging.DEBUG) # always debug regardless of user input
this.FILE_LOG.handlers.clear()
this.FILE_LOG.addHandler(file_handler)
# configure the stdout logger
STDOUT_LOGGER = structlog.wrap_logger(
logger=logging.Logger('console logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper("%H:%M:%S"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
]
)
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.dev.ConsoleRenderer(colors=colors),
)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)
STDOUT_LOGGER.addHandler(handler)
# used for integration tests
def capture_stdout_logs() -> StringIO:
capture_buf = io.StringIO()
stdout_capture_handler = logging.StreamHandler(capture_buf)
stdout_handler.setLevel(logging.DEBUG)
this.STDOUT_LOG.addHandler(stdout_capture_handler)
return capture_buf
# used for integration tests
def stop_capture_stdout_logs() -> None:
this.STDOUT_LOG.handlers = [
h for h in this.STDOUT_LOG.handlers
if not (hasattr(h, 'stream') and isinstance(h.stream, StringIO)) # type: ignore
]
def env_secrets() -> List[str]:
return [
v for k, v in os.environ.items()
if k.startswith(SECRET_ENV_PREFIX)
]
def scrub_secrets(msg: str, secrets: List[str]) -> str:
scrubbed = msg
for secret in secrets:
scrubbed = scrubbed.replace(secret, "*****")
return scrubbed
# returns a dictionary representation of the event fields.
# the message may contain secrets which must be scrubbed at the usage site.
def event_to_serializable_dict(
e: T_Event,
) -> Dict[str, Any]:
log_line = dict()
code: str
try:
log_line = e.to_dict()
except AttributeError as exc:
event_type = type(e).__name__
raise Exception( # TODO this may hang async threads
f"type {event_type} is not serializable. {str(exc)}"
# configure the json file handler
if json:
FILE_LOGGER = structlog.wrap_logger(
logger=logging.Logger('json file logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
]
)
# We get the code from the event object, so we don't need it in the data
if 'code' in log_line:
del log_line['code']
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.processors.JSONRenderer(),
)
handler = logging.handlers.WatchedFileHandler(filename=log_dest)
handler.setFormatter(formatter)
FILE_LOGGER.addHandler(handler)
event_dict = {
'type': 'log_line',
'log_version': LOG_VERSION,
'ts': get_ts_rfc3339(),
'pid': e.get_pid(),
'msg': e.message(),
'level': e.level_tag(),
'data': log_line,
'invocation_id': e.get_invocation_id(),
'thread_name': e.get_thread_name(),
'code': e.code
}
return event_dict
# translates an Event to a completely formatted text-based log line
# type hinting everything as strings so we don't get any unintentional string conversions via str()
def create_info_text_log_line(e: T_Event) -> str:
color_tag: str = '' if this.format_color else Style.RESET_ALL
ts: str = get_ts().strftime("%H:%M:%S")
scrubbed_msg: str = scrub_secrets(e.message(), env_secrets())
log_line: str = f"{color_tag}{ts} {scrubbed_msg}"
return log_line
def create_debug_text_log_line(e: T_Event) -> str:
log_line: str = ''
# Create a separator if this is the beginning of an invocation
if type(e) == MainReportVersion:
separator = 30 * '='
log_line = f'\n\n{separator} {get_ts()} | {get_invocation_id()} {separator}\n'
color_tag: str = '' if this.format_color else Style.RESET_ALL
ts: str = get_ts().strftime("%H:%M:%S.%f")
scrubbed_msg: str = scrub_secrets(e.message(), env_secrets())
level: str = e.level_tag() if len(e.level_tag()) == 5 else f"{e.level_tag()} "
thread = ''
if threading.current_thread().getName():
thread_name = threading.current_thread().getName()
thread_name = thread_name[:10]
thread_name = thread_name.ljust(10, ' ')
thread = f' [{thread_name}]:'
log_line = log_line + f"{color_tag}{ts} [{level}]{thread} {scrubbed_msg}"
return log_line
# translates an Event to a completely formatted json log line
def create_json_log_line(e: T_Event) -> Optional[str]:
if type(e) == EmptyLine:
return None # will not be sent to logger
# using preformatted ts string instead of formatting it here to be extra careful about timezone
values = event_to_serializable_dict(e)
raw_log_line = json.dumps(values, sort_keys=True)
return scrub_secrets(raw_log_line, env_secrets())
# calls create_stdout_text_log_line() or create_json_log_line() according to logger config
def create_log_line(
e: T_Event,
file_output=False
) -> Optional[str]:
if this.format_json:
return create_json_log_line(e) # json output, both console and file
elif file_output is True or flags.DEBUG:
return create_debug_text_log_line(e) # default file output
# configure the plaintext file handler
else:
return create_info_text_log_line(e) # console output
# allows for resuse of this obnoxious if else tree.
# do not use for exceptions, it doesn't pass along exc_info, stack_info, or extra
def send_to_logger(l: Union[Logger, logbook.Logger], level_tag: str, log_line: str):
if not log_line:
return
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(log_line)
elif level_tag == 'debug':
l.debug(log_line)
elif level_tag == 'info':
l.info(log_line)
elif level_tag == 'warn':
l.warning(log_line)
elif level_tag == 'error':
l.error(log_line)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
# TODO follow pattern from above ^^
FILE_LOGGER = structlog.wrap_logger(
logger=logging.Logger('plaintext file logger'),
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper("%H:%M:%S"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
]
)
def send_exc_to_logger(
l: Logger,
level_tag: str,
log_line: str,
exc_info=True,
stack_info=False,
extra=False
):
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'debug':
l.debug(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'info':
l.info(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'warn':
l.warning(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
elif level_tag == 'error':
l.error(
log_line,
exc_info=exc_info,
stack_info=stack_info,
extra=extra
)
else:
raise AssertionError(
f"While attempting to log {log_line}, encountered the unhandled level: {level_tag}"
formatter = structlog.stdlib.ProcessorFormatter(
processor=structlog.dev.ConsoleRenderer(colors=False),
)
handler = logging.handlers.WatchedFileHandler(filename=log_dest)
handler.setFormatter(formatter)
FILE_LOGGER.addHandler(handler)
# top-level method for accessing the new eventing system
@@ -317,78 +164,55 @@ def send_exc_to_logger(
# (i.e. - mutating the event history, printing to stdout, logging
# to files, etc.)
def fire_event(e: Event) -> None:
# skip logs when `--log-cache-events` is not passed
if isinstance(e, Cache) and not flags.LOG_CACHE_EVENTS:
return
# if and only if the event history deque will be completely filled by this event
# fire warning that old events are now being dropped
global EVENT_HISTORY
if len(EVENT_HISTORY) == (flags.EVENT_BUFFER_SIZE - 1):
EVENT_HISTORY.append(e)
fire_event(EventBufferFull())
else:
EVENT_HISTORY.append(e)
# backwards compatibility for plugins that require old logger (dbt-rpc)
if flags.ENABLE_LEGACY_LOGGER:
# using Event::message because the legacy logger didn't differentiate messages by
# destination
log_line = create_log_line(e)
if log_line:
send_to_logger(GLOBAL_LOGGER, e.level_tag(), log_line)
return # exit the function to avoid using the current logger as well
# always logs debug level regardless of user input
if not isinstance(e, NoFile):
log_line = create_log_line(e, file_output=True)
# doesn't send exceptions to exception logger
if log_line:
send_to_logger(FILE_LOG, level_tag=e.level_tag(), log_line=log_line)
if not isinstance(e, NoStdOut):
# explicitly checking the debug flag here so that potentially expensive-to-construct
# log messages are not constructed if debug messages are never shown.
if e.level_tag() == 'debug' and not flags.DEBUG:
return # eat the message in case it was one of the expensive ones
log_line = create_log_line(e)
if log_line:
if not isinstance(e, ShowException):
send_to_logger(STDOUT_LOG, level_tag=e.level_tag(), log_line=log_line)
EVENT_HISTORY.append(e)
level_tag = e.level_tag()
if isinstance(e, CliEventABC):
log_line = e.cli_msg()
if isinstance(e, ShowException):
event_dict = {
'exc_info': e.exc_info,
'stack_info': e.stack_info,
'extra': e.extra
}
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
STDOUT_LOGGER.debug(log_line, event_dict)
FILE_LOGGER.debug(log_line, event_dict)
elif level_tag == 'debug':
STDOUT_LOGGER.debug(log_line, event_dict)
FILE_LOGGER.debug(log_line, event_dict)
elif level_tag == 'info':
STDOUT_LOGGER.info(log_line, event_dict)
FILE_LOGGER.info(log_line, event_dict)
elif level_tag == 'warn':
STDOUT_LOGGER.warning(log_line, event_dict)
FILE_LOGGER.warning(log_line, event_dict)
elif level_tag == 'error':
STDOUT_LOGGER.error(log_line, event_dict)
FILE_LOGGER.error(log_line, event_dict)
else:
send_exc_to_logger(
STDOUT_LOG,
level_tag=e.level_tag(),
log_line=log_line,
exc_info=e.exc_info,
stack_info=e.stack_info,
extra=e.extra
raise AssertionError(
f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}"
)
# CliEventABC but not ShowException
else:
if level_tag == 'test':
# TODO after implmenting #3977 send to new test level
STDOUT_LOGGER.debug(log_line)
FILE_LOGGER.debug(log_line)
elif level_tag == 'debug':
STDOUT_LOGGER.debug(log_line)
FILE_LOGGER.debug(log_line)
elif level_tag == 'info':
STDOUT_LOGGER.info(log_line)
FILE_LOGGER.info(log_line)
elif level_tag == 'warn':
STDOUT_LOGGER.warning(log_line)
FILE_LOGGER.warning(log_line)
elif level_tag == 'error':
STDOUT_LOGGER.error(log_line)
FILE_LOGGER.error(log_line)
else:
raise AssertionError(
f"Event type {type(e).__name__} has unhandled level: {e.level_tag()}"
)
def get_invocation_id() -> str:
global invocation_id
if invocation_id is None:
invocation_id = str(uuid.uuid4())
return invocation_id
def set_invocation_id() -> None:
# This is primarily for setting the invocation_id for separate
# commands in the dbt servers. It shouldn't be necessary for the CLI.
global invocation_id
invocation_id = str(uuid.uuid4())
# exactly one time stamp per concrete event
def get_ts() -> datetime:
ts = datetime.utcnow()
return ts
# preformatted time stamp
def get_ts_rfc3339() -> str:
ts = get_ts()
ts_rfc3339 = ts.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
return ts_rfc3339

View File

@@ -0,0 +1,7 @@
from dbt.events.types import Event
from typing import List
# the global history of events for this session
# TODO this is naive and the memory footprint is likely far too large.
EVENT_HISTORY: List[Event] = []

View File

@@ -1,56 +0,0 @@
from dbt.helper_types import Lazy
from mashumaro import DataClassDictMixin
from mashumaro.config import (
BaseConfig as MashBaseConfig
)
from mashumaro.types import SerializationStrategy
from typing import Dict, List
# The dbtClassMixin serialization class has a DateTime serialization strategy
# class. If a datetime ends up in an event class, we could use a similar class
# here to serialize it in our preferred format.
class ExceptionSerialization(SerializationStrategy):
def serialize(self, value):
out = str(value)
return out
def deserialize(self, value):
return (Exception(value))
class BaseExceptionSerialization(SerializationStrategy):
def serialize(self, value):
return str(value)
def deserialize(self, value):
return (BaseException(value))
# This is an explicit deserializer for the type Lazy[Dict[str, List[str]]]
# mashumaro does not support composing serialization strategies, so all
# future uses of Lazy will need to register a unique serialization class like this one.
class LazySerialization1(SerializationStrategy):
def serialize(self, value) -> Dict[str, List[str]]:
return value.force()
# we _can_ deserialize into a lazy value, but that defers running the deserialization
# function till the value is used which can raise errors at very unexpected times.
# It's best practice to do strict deserialization unless you're in a very special case.
def deserialize(self, value):
raise Exception("Don't deserialize into a Lazy value. Try just using the value itself.")
# This class is the equivalent of dbtClassMixin that's used for serialization
# in other parts of the code. That class did extra things which we didn't want
# to use for events, so this class is a simpler version of dbtClassMixin.
class EventSerialization(DataClassDictMixin):
# This is where we register serializtion strategies per type.
class Config(MashBaseConfig):
serialization_strategy = {
Exception: ExceptionSerialization(),
BaseException: ExceptionSerialization(),
Lazy[Dict[str, List[str]]]: LazySerialization1()
}

View File

@@ -1,81 +0,0 @@
from dataclasses import dataclass
from .types import (
InfoLevel,
DebugLevel,
WarnLevel,
ErrorLevel,
ShowException,
NoFile
)
# Keeping log messages for testing separate since they are used for debugging.
# Reuse the existing messages when adding logs to tests.
@dataclass
class IntegrationTestInfo(InfoLevel, NoFile):
msg: str
code: str = "T001"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestDebug(DebugLevel, NoFile):
msg: str
code: str = "T002"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestWarn(WarnLevel, NoFile):
msg: str
code: str = "T003"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestError(ErrorLevel, NoFile):
msg: str
code: str = "T004"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class IntegrationTestException(ShowException, ErrorLevel, NoFile):
msg: str
code: str = "T005"
def message(self) -> str:
return f"Integration Test: {self.msg}"
@dataclass
class UnitTestInfo(InfoLevel, NoFile):
msg: str
code: str = "T006"
def message(self) -> str:
return f"Unit Test: {self.msg}"
# since mypy doesn't run on every file we need to suggest to mypy that every
# class gets instantiated. But we don't actually want to run this code.
# making the conditional `if False` causes mypy to skip it as dead code so
# we need to skirt around that by computing something it doesn't check statically.
#
# TODO remove these lines once we run mypy everywhere.
if 1 == 0:
IntegrationTestInfo(msg='')
IntegrationTestDebug(msg='')
IntegrationTestWarn(msg='')
IntegrationTestError(msg='')
IntegrationTestException(msg='')
UnitTestInfo(msg='')

File diff suppressed because it is too large Load Diff

View File

@@ -2,8 +2,7 @@ import builtins
import functools
from typing import NoReturn, Optional, Mapping, Any
from dbt.events.functions import fire_event, scrub_secrets, env_secrets
from dbt.events.types import GeneralWarningMsg, GeneralWarningException
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
from dbt import flags
from dbt.ui import line_wrap_message, warning_tag
@@ -53,7 +52,7 @@ class RuntimeException(RuntimeError, Exception):
def __init__(self, msg, node=None):
self.stack = []
self.node = node
self.msg = scrub_secrets(msg, env_secrets())
self.msg = msg
def add_node(self, node=None):
if node is not None and node is not self.node:
@@ -243,15 +242,6 @@ class ValidationException(RuntimeException):
MESSAGE = "Validation Error"
class ParsingException(RuntimeException):
CODE = 10015
MESSAGE = "Parsing Error"
@property
def type(self):
return 'Parsing'
class JSONValidationException(ValidationException):
def __init__(self, typename, errors):
self.typename = typename
@@ -454,38 +444,12 @@ def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)
def raise_parsing_error(msg, node=None) -> NoReturn:
raise ParsingException(msg, node)
def raise_database_error(msg, node=None) -> NoReturn:
raise DatabaseException(msg, node)
def raise_dependency_error(msg) -> NoReturn:
raise DependencyException(scrub_secrets(msg, env_secrets()))
def raise_git_cloning_error(error: CommandResultError) -> NoReturn:
error.cmd = scrub_secrets(str(error.cmd), env_secrets())
raise error
def raise_git_cloning_problem(repo) -> NoReturn:
repo = scrub_secrets(repo, env_secrets())
msg = '''\
Something went wrong while cloning {}
Check the debug logs for more information
'''
raise RuntimeException(msg.format(repo))
def disallow_secret_env_var(env_var_name) -> NoReturn:
"""Raise an error when a secret env var is referenced outside allowed
rendering contexts"""
msg = ("Secret env vars are allowed only in profiles.yml or packages.yml. "
"Found '{env_var_name}' referenced elsewhere.")
raise_parsing_error(msg.format(env_var_name=env_var_name))
raise DependencyException(msg)
def invalid_type_error(method_name, arg_name, got_value, expected_type,
@@ -703,9 +667,9 @@ def missing_materialization(model, adapter_type):
def bad_package_spec(repo, spec, error_message):
msg = "Error checking out spec='{}' for repo {}\n{}".format(spec, repo, error_message)
raise InternalException(scrub_secrets(msg, env_secrets()))
raise InternalException(
"Error checking out spec='{}' for repo {}\n{}".format(
spec, repo, error_message))
def raise_cache_inconsistent(message):
@@ -774,10 +738,6 @@ def system_error(operation_name):
class ConnectionException(Exception):
"""
There was a problem with the connection that returned a bad response,
timed out, or resulted in a file that is corrupt.
"""
pass
@@ -1014,16 +974,21 @@ def raise_duplicate_alias(
def warn_or_error(msg, node=None, log_fmt=None):
if flags.WARN_ERROR:
raise_compiler_error(scrub_secrets(msg, env_secrets()), node)
raise_compiler_error(msg, node)
else:
fire_event(GeneralWarningMsg(msg=msg, log_fmt=log_fmt))
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
def warn_or_raise(exc, log_fmt=None):
if flags.WARN_ERROR:
raise exc
else:
fire_event(GeneralWarningException(exc=exc, log_fmt=log_fmt))
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
def warn(msg, node=None):

View File

@@ -17,6 +17,7 @@ PROFILES_DIR = os.path.expanduser(
STRICT_MODE = False # Only here for backwards compatibility
FULL_REFRESH = False # subcommand
STORE_FAILURES = False # subcommand
GREEDY = None # subcommand
# Global CLI commands
USE_EXPERIMENTAL_PARSER = None
@@ -32,9 +33,6 @@ FAIL_FAST = None
SEND_ANONYMOUS_USAGE_STATS = None
PRINTER_WIDTH = 80
WHICH = None
INDIRECT_SELECTION = None
LOG_CACHE_EVENTS = None
EVENT_BUFFER_SIZE = 100000
# Global CLI defaults. These flags are set from three places:
# CLI args, environment variables, and user_config (profiles.yml).
@@ -52,10 +50,7 @@ flag_defaults = {
"VERSION_CHECK": True,
"FAIL_FAST": False,
"SEND_ANONYMOUS_USAGE_STATS": True,
"PRINTER_WIDTH": 80,
"INDIRECT_SELECTION": 'eager',
"LOG_CACHE_EVENTS": False,
"EVENT_BUFFER_SIZE": 100000
"PRINTER_WIDTH": 80
}
@@ -86,7 +81,6 @@ def env_set_path(key: str) -> Optional[Path]:
MACRO_DEBUGGING = env_set_truthy('DBT_MACRO_DEBUGGING')
DEFER_MODE = env_set_truthy('DBT_DEFER_TO_STATE')
ARTIFACT_STATE_PATH = env_set_path('DBT_ARTIFACT_STATE_PATH')
ENABLE_LEGACY_LOGGER = env_set_truthy('DBT_ENABLE_LEGACY_LOGGER')
def _get_context():
@@ -101,14 +95,15 @@ MP_CONTEXT = _get_context()
def set_from_args(args, user_config):
global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
USE_EXPERIMENTAL_PARSER, STATIC_PARSER, WRITE_JSON, PARTIAL_PARSE, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, INDIRECT_SELECTION, \
USE_COLORS, STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH, \
WHICH, LOG_CACHE_EVENTS, EVENT_BUFFER_SIZE
WHICH
STRICT_MODE = False # backwards compatibility
# cli args without user_config or env var option
FULL_REFRESH = getattr(args, 'full_refresh', FULL_REFRESH)
STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
GREEDY = getattr(args, 'greedy', GREEDY)
WHICH = getattr(args, 'which', WHICH)
# global cli flags with env var and user_config alternatives
@@ -125,9 +120,6 @@ def set_from_args(args, user_config):
FAIL_FAST = get_flag_value('FAIL_FAST', args, user_config)
SEND_ANONYMOUS_USAGE_STATS = get_flag_value('SEND_ANONYMOUS_USAGE_STATS', args, user_config)
PRINTER_WIDTH = get_flag_value('PRINTER_WIDTH', args, user_config)
INDIRECT_SELECTION = get_flag_value('INDIRECT_SELECTION', args, user_config)
LOG_CACHE_EVENTS = get_flag_value('LOG_CACHE_EVENTS', args, user_config)
EVENT_BUFFER_SIZE = get_flag_value('EVENT_BUFFER_SIZE', args, user_config)
def get_flag_value(flag, args, user_config):
@@ -140,13 +132,7 @@ def get_flag_value(flag, args, user_config):
if env_value is not None and env_value != '':
env_value = env_value.lower()
# non Boolean values
if flag in [
'LOG_FORMAT',
'PRINTER_WIDTH',
'PROFILES_DIR',
'INDIRECT_SELECTION',
'EVENT_BUFFER_SIZE'
]:
if flag in ['LOG_FORMAT', 'PRINTER_WIDTH', 'PROFILES_DIR']:
flag_value = env_value
else:
flag_value = env_set_bool(env_value)
@@ -154,7 +140,7 @@ def get_flag_value(flag, args, user_config):
flag_value = getattr(user_config, lc_flag)
else:
flag_value = flag_defaults[flag]
if flag in ['PRINTER_WIDTH', 'EVENT_BUFFER_SIZE']: # must be ints
if flag == 'PRINTER_WIDTH': # printer_width must be an int or it hangs
flag_value = int(flag_value)
if flag == 'PROFILES_DIR':
flag_value = os.path.abspath(flag_value)
@@ -177,7 +163,4 @@ def get_flag_dict():
"fail_fast": FAIL_FAST,
"send_anonymous_usage_stats": SEND_ANONYMOUS_USAGE_STATS,
"printer_width": PRINTER_WIDTH,
"indirect_selection": INDIRECT_SELECTION,
"log_cache_events": LOG_CACHE_EVENTS,
"event_buffer_size": EVENT_BUFFER_SIZE
}

View File

@@ -1 +0,0 @@
# Graph README

View File

@@ -16,18 +16,16 @@ from .selector_spec import (
SelectionIntersection,
SelectionDifference,
SelectionCriteria,
IndirectSelection
)
INTERSECTION_DELIMITER = ','
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*', 'metric:*']
DEFAULT_INCLUDES: List[str] = ['fqn:*', 'source:*', 'exposure:*']
DEFAULT_EXCLUDES: List[str] = []
def parse_union(
components: List[str], expect_exists: bool,
indirect_selection: IndirectSelection = IndirectSelection.Eager
components: List[str], expect_exists: bool, greedy: bool = False
) -> SelectionUnion:
# turn ['a b', 'c'] -> ['a', 'b', 'c']
raw_specs = itertools.chain.from_iterable(
@@ -38,7 +36,7 @@ def parse_union(
# ['a', 'b', 'c,d'] -> union('a', 'b', intersection('c', 'd'))
for raw_spec in raw_specs:
intersection_components: List[SelectionSpec] = [
SelectionCriteria.from_single_spec(part, indirect_selection=indirect_selection)
SelectionCriteria.from_single_spec(part, greedy=greedy)
for part in raw_spec.split(INTERSECTION_DELIMITER)
]
union_components.append(SelectionIntersection(
@@ -54,36 +52,21 @@ def parse_union(
def parse_union_from_default(
raw: Optional[List[str]], default: List[str],
indirect_selection: IndirectSelection = IndirectSelection.Eager
raw: Optional[List[str]], default: List[str], greedy: bool = False
) -> SelectionUnion:
components: List[str]
expect_exists: bool
if raw is None:
return parse_union(
components=default,
expect_exists=False,
indirect_selection=indirect_selection)
return parse_union(components=default, expect_exists=False, greedy=greedy)
else:
return parse_union(
components=raw,
expect_exists=True,
indirect_selection=indirect_selection)
return parse_union(components=raw, expect_exists=True, greedy=greedy)
def parse_difference(
include: Optional[List[str]], exclude: Optional[List[str]]
) -> SelectionDifference:
included = parse_union_from_default(
include,
DEFAULT_INCLUDES,
indirect_selection=IndirectSelection(flags.INDIRECT_SELECTION)
)
excluded = parse_union_from_default(
exclude,
DEFAULT_EXCLUDES,
indirect_selection=IndirectSelection.Eager)
included = parse_union_from_default(include, DEFAULT_INCLUDES, greedy=bool(flags.GREEDY))
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
return SelectionDifference(components=[included, excluded])
@@ -165,7 +148,7 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
union_def_parts = _get_list_dicts(definition, 'union')
include, exclude = _parse_include_exclude_subdefs(union_def_parts)
union = SelectionUnion(components=include)
union = SelectionUnion(components=include, greedy_warning=False)
if exclude is None:
union.raw = definition
@@ -173,7 +156,8 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
else:
return SelectionDifference(
components=[union, exclude],
raw=definition
raw=definition,
greedy_warning=False
)
@@ -182,7 +166,7 @@ def parse_intersection_definition(
) -> SelectionSpec:
intersection_def_parts = _get_list_dicts(definition, 'intersection')
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
intersection = SelectionIntersection(components=include)
intersection = SelectionIntersection(components=include, greedy_warning=False)
if exclude is None:
intersection.raw = definition
@@ -190,7 +174,8 @@ def parse_intersection_definition(
else:
return SelectionDifference(
components=[intersection, exclude],
raw=definition
raw=definition,
greedy_warning=False
)
@@ -224,7 +209,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
if diff_arg is None:
return base
else:
return SelectionDifference(components=[base, diff_arg])
return SelectionDifference(components=[base, diff_arg], greedy_warning=False)
def parse_from_definition(

View File

@@ -1,7 +1,6 @@
from typing import (
Set, Iterable, Iterator, Optional, NewType
)
from itertools import product
import networkx as nx # type: ignore
from dbt.exceptions import InternalException
@@ -31,13 +30,11 @@ class Graph:
"""Returns all nodes having a path to `node` in `graph`"""
if not self.graph.has_node(node):
raise InternalException(f'Node {node} not found in the graph!')
# This used to use nx.utils.reversed(self.graph), but that is deprecated,
# so changing to use self.graph.reverse(copy=False) as recommeneded
G = self.graph.reverse(copy=False) if self.graph.is_directed() else self.graph
anc = nx.single_source_shortest_path_length(G=G,
source=node,
cutoff=max_depth)\
.keys()
with nx.utils.reversed(self.graph):
anc = nx.single_source_shortest_path_length(G=self.graph,
source=node,
cutoff=max_depth)\
.keys()
return anc - {node}
def descendants(
@@ -80,26 +77,17 @@ class Graph:
successors.update(self.graph.successors(node))
return successors
def get_subset_graph(self, selected: Iterable[UniqueId]) -> "Graph":
def get_subset_graph(self, selected: Iterable[UniqueId]) -> 'Graph':
"""Create and return a new graph that is a shallow copy of the graph,
but with only the nodes in include_nodes. Transitive edges across
removed nodes are preserved as explicit new edges.
"""
new_graph = nx.algorithms.transitive_closure(self.graph)
new_graph = self.graph.copy()
include_nodes = set(selected)
for node in self:
if node not in include_nodes:
source_nodes = [x for x, _ in new_graph.in_edges(node)]
target_nodes = [x for _, x in new_graph.out_edges(node)]
new_edges = product(source_nodes, target_nodes)
non_cyclic_new_edges = [
(source, target) for source, target in new_edges if source != target
] # removes cyclic refs
new_graph.add_edges_from(non_cyclic_new_edges)
new_graph.remove_node(node)
for node in include_nodes:
@@ -108,7 +96,6 @@ class Graph:
"Couldn't find model '{}' -- does it exist or is "
"it disabled?".format(node)
)
return Graph(new_graph)
def subgraph(self, nodes: Iterable[UniqueId]) -> 'Graph':

View File

@@ -5,7 +5,7 @@ from queue import PriorityQueue
from typing import Dict, Set, List, Generator, Optional
from .graph import UniqueId
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure, ParsedMetric
from dbt.contracts.graph.parsed import ParsedSourceDefinition, ParsedExposure
from dbt.contracts.graph.compiled import GraphMemberNode
from dbt.contracts.graph.manifest import Manifest
from dbt.node_types import NodeType
@@ -47,8 +47,8 @@ class GraphQueue:
node = self.manifest.expect(node_id)
if node.resource_type != NodeType.Model:
return False
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure, ParsedMetric))
# must be a Model - tell mypy this won't be a Source or Exposure
assert not isinstance(node, (ParsedSourceDefinition, ParsedExposure))
if node.is_ephemeral:
return False
return True

View File

@@ -3,10 +3,12 @@ from typing import Set, List, Optional, Tuple
from .graph import Graph, UniqueId
from .queue import GraphQueue
from .selector_methods import MethodManager
from .selector_spec import SelectionCriteria, SelectionSpec, IndirectSelection
from .selector_spec import SelectionCriteria, SelectionSpec
from dbt.events.functions import fire_event
from dbt.events.types import SelectorReportInvalidSelector
from dbt.events.types import (
SelectorAlertUpto3UnusedNodes, SelectorAlertAllUnusedNodes, SelectorReportInvalidSelector
)
from dbt.node_types import NodeType
from dbt.exceptions import (
InternalException,
@@ -30,6 +32,12 @@ def alert_non_existence(raw_spec, nodes):
)
def alert_unused_nodes(raw_spec, node_names):
fire_event(SelectorAlertUpto3UnusedNodes(node_names=node_names))
if len(node_names) > 4:
fire_event(SelectorAlertAllUnusedNodes(node_names=node_names))
def can_select_indirectly(node):
"""If a node is not selected itself, but its parent(s) are, it may qualify
for indirect selection.
@@ -86,9 +94,8 @@ class NodeSelector(MethodManager):
try:
collected = self.select_included(nodes, spec)
except InvalidSelectorException:
valid_selectors = ", ".join(self.SELECTOR_METHODS)
fire_event(SelectorReportInvalidSelector(
valid_selectors=valid_selectors,
selector_methods=self.SELECTOR_METHODS,
spec_method=spec.method,
raw_spec=spec.raw
))
@@ -97,7 +104,7 @@ class NodeSelector(MethodManager):
neighbors = self.collect_specified_neighbors(spec, collected)
direct_nodes, indirect_nodes = self.expand_selection(
selected=(collected | neighbors),
indirect_selection=spec.indirect_selection
greedy=spec.greedy
)
return direct_nodes, indirect_nodes
@@ -169,8 +176,6 @@ class NodeSelector(MethodManager):
return source.config.enabled
elif unique_id in self.manifest.exposures:
return True
elif unique_id in self.manifest.metrics:
return True
node = self.manifest.nodes[unique_id]
return not node.empty and node.config.enabled
@@ -188,8 +193,6 @@ class NodeSelector(MethodManager):
node = self.manifest.sources[unique_id]
elif unique_id in self.manifest.exposures:
node = self.manifest.exposures[unique_id]
elif unique_id in self.manifest.metrics:
node = self.manifest.metrics[unique_id]
else:
raise InternalException(
f'Node {unique_id} not found in the manifest!'
@@ -205,21 +208,21 @@ class NodeSelector(MethodManager):
}
def expand_selection(
self, selected: Set[UniqueId],
indirect_selection: IndirectSelection = IndirectSelection.Eager
self, selected: Set[UniqueId], greedy: bool = False
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
# Test selection by default expands to include an implicitly/indirectly selected tests.
# `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, EAGER and CAUTIOUS.
# Test selection can expand to include an implicitly/indirectly selected test.
# In this way, `dbt test -m model_a` also includes tests that directly depend on `model_a`.
# Expansion has two modes, GREEDY and NOT GREEDY.
#
# EAGER mode: If ANY parent is selected, select the test.
# GREEDY mode: If ANY parent is selected, select the test. We use this for EXCLUSION.
#
# CAUTIOUS mode:
# NOT GREEDY mode:
# - If ALL parents are selected, select the test.
# - If ANY parent is missing, return it separately. We'll keep it around
# for later and see if its other parents show up.
# Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
# CLI argument or by specifying `indirect_selection: true` in a yaml selector
# We use this for INCLUSION.
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
# or by specifying `greedy: true` in a yaml selector
direct_nodes = set(selected)
indirect_nodes = set()
@@ -229,10 +232,7 @@ class NodeSelector(MethodManager):
node = self.manifest.nodes[unique_id]
if can_select_indirectly(node):
# should we add it in directly?
if (
indirect_selection == IndirectSelection.Eager or
set(node.depends_on.nodes) <= set(selected)
):
if greedy or set(node.depends_on.nodes) <= set(selected):
direct_nodes.add(unique_id)
# if not:
else:
@@ -246,10 +246,6 @@ class NodeSelector(MethodManager):
# Check tests previously selected indirectly to see if ALL their
# parents are now present.
# performance: if identical, skip the processing below
if set(direct_nodes) == set(indirect_nodes):
return direct_nodes
selected = set(direct_nodes)
for unique_id in indirect_nodes:
@@ -273,6 +269,16 @@ class NodeSelector(MethodManager):
selected_nodes, indirect_only = self.select_nodes(spec)
filtered_nodes = self.filter_selection(selected_nodes)
if indirect_only:
filtered_unused_nodes = self.filter_selection(indirect_only)
if filtered_unused_nodes and spec.greedy_warning:
# log anything that didn't make the cut
unused_node_names = []
for unique_id in filtered_unused_nodes:
name = self.manifest.nodes[unique_id].name
unused_node_names.append(name)
alert_unused_nodes(spec, unused_node_names)
return filtered_nodes
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:

View File

@@ -1,7 +1,7 @@
import abc
from itertools import chain
from pathlib import Path
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional, Callable
from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional
from dbt.dataclass_schema import StrEnum
@@ -18,7 +18,6 @@ from dbt.contracts.graph.parsed import (
HasTestMetadata,
ParsedSingularTestNode,
ParsedExposure,
ParsedMetric,
ParsedGenericTestNode,
ParsedSourceDefinition,
)
@@ -46,7 +45,6 @@ class MethodName(StrEnum):
ResourceType = 'resource_type'
State = 'state'
Exposure = 'exposure'
Metric = 'metric'
Result = 'result'
@@ -74,7 +72,7 @@ def is_selected_node(fqn: List[str], node_selector: str):
return True
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure, ParsedMetric]
SelectorTarget = Union[ParsedSourceDefinition, ManifestNode, ParsedExposure]
class SelectorMethod(metaclass=abc.ABCMeta):
@@ -121,25 +119,13 @@ class SelectorMethod(metaclass=abc.ABCMeta):
continue
yield unique_id, exposure
def metric_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, ParsedMetric]]:
for key, metric in self.manifest.metrics.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, metric
def all_nodes(
self,
included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
yield from chain(self.parsed_nodes(included_nodes),
self.source_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
self.exposure_nodes(included_nodes))
def configurable_nodes(
self,
@@ -151,10 +137,9 @@ class SelectorMethod(metaclass=abc.ABCMeta):
def non_source_nodes(
self,
included_nodes: Set[UniqueId],
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode, ParsedMetric]]]:
) -> Iterator[Tuple[UniqueId, Union[ParsedExposure, ManifestNode]]]:
yield from chain(self.parsed_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes))
self.exposure_nodes(included_nodes))
@abc.abstractmethod
def search(
@@ -266,33 +251,6 @@ class ExposureSelectorMethod(SelectorMethod):
yield node
class MetricSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
) -> Iterator[UniqueId]:
parts = selector.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_name = parts[0]
elif len(parts) == 2:
target_package, target_name = parts
else:
msg = (
'Invalid metric selector value "{}". Metrics must be of '
'the form ${{metric_name}} or '
'${{metric_package.metric_name}}'
).format(selector)
raise RuntimeException(msg)
for node, real_node in self.metric_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_name not in (real_node.name, SELECTOR_GLOB):
continue
yield node
class PathSelectorMethod(SelectorMethod):
def search(
self, included_nodes: Set[UniqueId], selector: str
@@ -478,28 +436,42 @@ class StateSelectorMethod(SelectorMethod):
previous_macros = []
return self.recursively_check_macros_modified(node, previous_macros)
# TODO check modifed_content and check_modified macro seems a bit redundent
def check_modified_content(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
def check_modified(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
different_contents = not new.same_contents(old) # type: ignore
upstream_macro_change = self.check_macros_modified(new)
return different_contents or upstream_macro_change
def check_modified_body(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if hasattr(new, "same_body"):
return not new.same_body(old) # type: ignore
else:
return False
def check_modified_configs(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if hasattr(new, "same_config"):
return not new.same_config(old) # type: ignore
else:
return False
def check_modified_persisted_descriptions(
self, old: Optional[SelectorTarget], new: SelectorTarget
) -> bool:
if hasattr(new, "same_persisted_description"):
return not new.same_persisted_description(old) # type: ignore
else:
return False
def check_modified_relation(
self, old: Optional[SelectorTarget], new: SelectorTarget
) -> bool:
if hasattr(new, "same_database_representation"):
return not new.same_database_representation(old) # type: ignore
else:
return False
def check_modified_macros(self, _, new: SelectorTarget) -> bool:
return self.check_macros_modified(new)
@staticmethod
def check_modified_factory(
compare_method: str
) -> Callable[[Optional[SelectorTarget], SelectorTarget], bool]:
# get a function that compares two selector target based on compare method provided
def check_modified_things(old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if hasattr(new, compare_method):
# when old body does not exist or old and new are not the same
return not old or not getattr(new, compare_method)(old) # type: ignore
else:
return False
return check_modified_things
def check_new(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
return old is None
@@ -513,21 +485,14 @@ class StateSelectorMethod(SelectorMethod):
state_checks = {
# it's new if there is no old version
'new':
lambda old, _: old is None,
'new': lambda old, _: old is None,
# use methods defined above to compare properties of old + new
'modified':
self.check_modified_content,
'modified.body':
self.check_modified_factory('same_body'),
'modified.configs':
self.check_modified_factory('same_config'),
'modified.persisted_descriptions':
self.check_modified_factory('same_persisted_description'),
'modified.relation':
self.check_modified_factory('same_database_representation'),
'modified.macros':
self.check_modified_macros,
'modified': self.check_modified,
'modified.body': self.check_modified_body,
'modified.configs': self.check_modified_configs,
'modified.persisted_descriptions': self.check_modified_persisted_descriptions,
'modified.relation': self.check_modified_relation,
'modified.macros': self.check_modified_macros,
}
if selector in state_checks:
checker = state_checks[selector]
@@ -547,8 +512,6 @@ class StateSelectorMethod(SelectorMethod):
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
if checker(previous_node, real_node):
yield node
@@ -581,10 +544,8 @@ class MethodManager:
MethodName.Config: ConfigSelectorMethod,
MethodName.TestName: TestNameSelectorMethod,
MethodName.TestType: TestTypeSelectorMethod,
MethodName.ResourceType: ResourceTypeSelectorMethod,
MethodName.State: StateSelectorMethod,
MethodName.Exposure: ExposureSelectorMethod,
MethodName.Metric: MetricSelectorMethod,
MethodName.Result: ResultSelectorMethod,
}

View File

@@ -2,7 +2,6 @@ import os
import re
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
from dbt.dataclass_schema import StrEnum
from typing import (
Set, Iterator, List, Optional, Dict, Union, Any, Iterable, Tuple
@@ -23,11 +22,6 @@ RAW_SELECTOR_PATTERN = re.compile(
SELECTOR_METHOD_SEPARATOR = '.'
class IndirectSelection(StrEnum):
Eager = 'eager'
Cautious = 'cautious'
def _probably_path(value: str):
"""Decide if value is probably a path. Windows has two path separators, so
we should check both sep ('\\') and altsep ('/') there.
@@ -72,7 +66,8 @@ class SelectionCriteria:
parents_depth: Optional[int]
children: bool
children_depth: Optional[int]
indirect_selection: IndirectSelection = IndirectSelection.Eager
greedy: bool = False
greedy_warning: bool = False # do not raise warning for yaml selectors
def __post_init__(self):
if self.children and self.childrens_parents:
@@ -110,8 +105,7 @@ class SelectionCriteria:
@classmethod
def selection_criteria_from_dict(
cls, raw: Any, dct: Dict[str, Any],
indirect_selection: IndirectSelection = IndirectSelection.Eager
cls, raw: Any, dct: Dict[str, Any], greedy: bool = False
) -> 'SelectionCriteria':
if 'value' not in dct:
raise RuntimeException(
@@ -121,12 +115,6 @@ class SelectionCriteria:
parents_depth = _match_to_int(dct, 'parents_depth')
children_depth = _match_to_int(dct, 'children_depth')
# If defined field in selector, override CLI flag
indirect_selection = IndirectSelection(
dct.get('indirect_selection', None) or indirect_selection
)
return cls(
raw=raw,
method=method_name,
@@ -137,7 +125,7 @@ class SelectionCriteria:
parents_depth=parents_depth,
children=bool(dct.get('children')),
children_depth=children_depth,
indirect_selection=indirect_selection
greedy=(greedy or bool(dct.get('greedy'))),
)
@classmethod
@@ -149,7 +137,7 @@ class SelectionCriteria:
method_name, method_arguments = cls.parse_method(dct)
meth_name = str(method_name)
if method_arguments:
meth_name += '.' + '.'.join(method_arguments)
meth_name = meth_name + '.' + '.'.join(method_arguments)
dct['method'] = meth_name
dct = {k: v for k, v in dct.items() if (v is not None and v != '')}
if 'childrens_parents' in dct:
@@ -158,23 +146,18 @@ class SelectionCriteria:
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
if 'greedy' in dct:
dct['greedy'] = bool(dct.get('greedy'))
return dct
@classmethod
def from_single_spec(
cls, raw: str,
indirect_selection: IndirectSelection = IndirectSelection.Eager
) -> 'SelectionCriteria':
def from_single_spec(cls, raw: str, greedy: bool = False) -> 'SelectionCriteria':
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
# bad spec!
raise RuntimeException(f'Invalid selector spec "{raw}"')
return cls.selection_criteria_from_dict(
raw,
result.groupdict(),
indirect_selection=indirect_selection
)
return cls.selection_criteria_from_dict(raw, result.groupdict(), greedy=greedy)
class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
@@ -182,10 +165,12 @@ class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
self,
components: Iterable[SelectionSpec],
expect_exists: bool = False,
greedy_warning: bool = True,
raw: Any = None,
):
self.components: List[SelectionSpec] = list(components)
self.expect_exists = expect_exists
self.greedy_warning = greedy_warning
self.raw = raw
def __iter__(self) -> Iterator[SelectionSpec]:

View File

@@ -1,8 +1,4 @@
# never name this package "types", or mypy will crash in ugly ways
# necessary for annotating constructors
from __future__ import annotations
from dataclasses import dataclass
from datetime import timedelta
from pathlib import Path
@@ -13,7 +9,6 @@ from dbt.dataclass_schema import (
)
from hologram import FieldEncoder, JsonDict
from mashumaro.types import SerializableType
from typing import Callable, cast, Generic, Optional, TypeVar
class Port(int, SerializableType):
@@ -98,35 +93,3 @@ dbtClassMixin.register_field_encoders({
FQNPath = Tuple[str, ...]
PathSet = AbstractSet[FQNPath]
T = TypeVar('T')
# A data type for representing lazily evaluated values.
#
# usage:
# x = Lazy.defer(lambda: expensive_fn())
# y = x.force()
#
# inspired by the purescript data type
# https://pursuit.purescript.org/packages/purescript-lazy/5.0.0/docs/Data.Lazy
@dataclass
class Lazy(Generic[T]):
_f: Callable[[], T]
memo: Optional[T] = None
# constructor for lazy values
@classmethod
def defer(cls, f: Callable[[], T]) -> Lazy[T]:
return Lazy(f)
# workaround for open mypy issue:
# https://github.com/python/mypy/issues/6910
def _typed_eval_f(self) -> T:
return cast(Callable[[], T], getattr(self, "_f"))()
# evaluates the function if the value has not been memoized already
def force(self) -> T:
if self.memo is None:
self.memo = self._typed_eval_f()
return self.memo

View File

@@ -1 +0,0 @@
# Include README

View File

@@ -2,6 +2,5 @@ config-version: 2
name: dbt
version: 1.0
docs-paths: ["docs"]
docs-paths: ['docs']
macro-paths: ["macros"]
test-paths: ["tests"]

View File

@@ -26,7 +26,7 @@ On model pages, you'll see the immediate parents and children of the model you'r
button at the top-right of this lineage pane, you'll be able to see all of the models that are used to build,
or are built from, the model you're exploring.
Once expanded, you'll be able to use the `--select` and `--exclude` model selection syntax to filter the
Once expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).
Note that you can also right-click on models to interactively filter and explore the graph.
@@ -35,9 +35,9 @@ Note that you can also right-click on models to interactively filter and explore
### More information
- [What is dbt](https://docs.getdbt.com/docs/introduction)?
- [What is dbt](https://docs.getdbt.com/docs/overview)?
- Read the [dbt viewpoint](https://docs.getdbt.com/docs/viewpoint)
- [Installation](https://docs.getdbt.com/docs/installation)
- Join the [dbt Community](https://www.getdbt.com/community/) for questions and discussion
- Join the [chat](https://community.getdbt.com/) on Slack for live questions and support.
{% enddocs %}

View File

@@ -1,89 +0,0 @@
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{# helper for adapter-specific implementations of get_columns_in_relation #}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if add_columns and remove_columns }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -0,0 +1,344 @@
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}
{% macro create_table_as(temporary, relation, sql) -%}
{{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, sql) }}
{%- endmacro %}
{% macro default__create_table_as(temporary, relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
as (
{{ sql }}
);
{% endmacro %}
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}
{% macro create_view_as(relation, sql) -%}
{{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}
{%- endmacro %}
{% macro default__create_view_as(relation, sql) -%}
{%- set sql_header = config.get('sql_header', none) -%}
{{ sql_header if sql_header is not none }}
create view {{ relation }} as (
{{ sql }}
);
{% endmacro %}
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro set_sql_header(config) -%}
{{ config.set('sql_header', caller()) }}
{%- endmacro %}
{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
{{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}
{% endmacro %}
{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
{% if add_columns is none %}
{% set add_columns = [] %}
{% endif %}
{% if remove_columns is none %}
{% set remove_columns = [] %}
{% endif %}
{% set sql -%}
alter {{ relation.type }} {{ relation }}
{% for column in add_columns %}
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
{% endfor %}{{ ',' if remove_columns | length > 0 }}
{% for column in remove_columns %}
drop column {{ column.name }}{{ ',' if not loop.last }}
{% endfor %}
{%- endset -%}
{% do run_query(sql) %}
{% endmacro %}

View File

@@ -1,26 +0,0 @@
{% macro current_timestamp() -%}
{{ adapter.dispatch('current_timestamp', 'dbt')() }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field, filter) %}
{{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field, filter) %}
{% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% if filter %}
where {{ filter }}
{% endif %}
{% endcall %}
{{ return(load_result('collect_freshness').table) }}
{% endmacro %}

View File

@@ -1,23 +0,0 @@
{% macro get_create_index_sql(relation, index_dict) -%}
{{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}
{% endmacro %}
{% macro default__get_create_index_sql(relation, index_dict) -%}
{% do return(None) %}
{% endmacro %}
{% macro create_indexes(relation) -%}
{{ adapter.dispatch('create_indexes', 'dbt')(relation) }}
{%- endmacro %}
{% macro default__create_indexes(relation) -%}
{%- set _indexes = config.get('indexes', default=[]) -%}
{% for _index_dict in _indexes %}
{% set create_index_sql = get_create_index_sql(relation, _index_dict) %}
{% if create_index_sql %}
{% do run_query(create_index_sql) %}
{% endif %}
{% endfor %}
{% endmacro %}

View File

@@ -1,65 +0,0 @@
{% macro get_catalog(information_schema, schemas) -%}
{{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schema, schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ database }}.INFORMATION_SCHEMA
{%- else -%}
INFORMATION_SCHEMA
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% set sql %}
select distinct schema_name
from {{ information_schema_name(database) }}.SCHEMATA
where catalog_name ilike '{{ database }}'
{% endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% set sql -%}
select count(*)
from {{ information_schema.replace(information_schema_view='SCHEMATA') }}
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endset %}
{{ return(run_query(sql)) }}
{% endmacro %}
{% macro list_relations_without_caching(schema_relation) %}
{{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(schema_relation) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}

View File

@@ -1,33 +0,0 @@
{% macro alter_column_comment(relation, column_dict) -%}
{{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}
{% endmacro %}
{% macro default__alter_column_comment(relation, column_dict) -%}
{{ exceptions.raise_not_implemented(
'alter_column_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_relation_comment(relation, relation_comment) -%}
{{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}
{% endmacro %}
{% macro default__alter_relation_comment(relation, relation_comment) -%}
{{ exceptions.raise_not_implemented(
'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}
{{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}
{% endmacro %}
{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_relation and config.persist_relation_docs() and model.description %}
{% do run_query(alter_relation_comment(relation, model.description)) %}
{% endif %}
{% if for_columns and config.persist_column_docs() and model.columns %}
{% do run_query(alter_column_comment(relation, model.columns)) %}
{% endif %}
{% endmacro %}

View File

@@ -1,84 +0,0 @@
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier}) -%}
{% do return(tmp_relation) %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro get_or_create_relation(database, schema, identifier, type) -%}
{{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }}
{% endmacro %}
{% macro default__get_or_create_relation(database, schema, identifier, type) %}
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
{% if target_relation %}
{% do return([true, target_relation]) %}
{% endif %}
{%- set new_relation = api.Relation.create(
database=database,
schema=schema,
identifier=identifier,
type=type
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}
{# a user-friendly interface into adapter.get_relation #}
{% macro load_relation(relation) %}
{% do return(adapter.get_relation(
database=relation.database,
schema=relation.schema,
identifier=relation.identifier
)) -%}
{% endmacro %}
{# not used much, here for backwards compatibility #}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}

View File

@@ -1,20 +0,0 @@
{% macro create_schema(relation) -%}
{{ adapter.dispatch('create_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__create_schema(relation) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{ relation.without_identifier() }}
{% endcall %}
{% endmacro %}
{% macro drop_schema(relation) -%}
{{ adapter.dispatch('drop_schema', 'dbt')(relation) }}
{% endmacro %}
{% macro default__drop_schema(relation) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{ relation.without_identifier() }} cascade
{% endcall %}
{% endmacro %}

View File

@@ -15,7 +15,6 @@
{%- endif -%}
{%- endmacro %}
{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}
{%- set sql = caller() -%}
@@ -29,13 +28,3 @@
{%- endif -%}
{%- endmacro %}
{# a user-friendly interface into statements #}
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -1,3 +1,4 @@
{% macro convert_datetime(date_str, date_fmt) %}
{% set error_msg -%}
@@ -9,7 +10,6 @@
{% endmacro %}
{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt="%Y%m%d", out_fmt="%Y%m%d") %}
{% set end_date_str = start_date_str if end_date_str is none else end_date_str %}
@@ -38,7 +38,6 @@
{{ return(date_list) }}
{% endmacro %}
{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}
{% set partition_range = (raw_partition_date | string).split(",") %}
@@ -55,7 +54,6 @@
{{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}
{% endmacro %}
{% macro py_current_timestring() %}
{% set dt = modules.datetime.datetime.now() %}
{% do return(dt.strftime("%Y%m%d%H%M%S%f")) %}

Some files were not shown because too many files have changed in this diff Show More