mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-18 22:51:27 +00:00
Compare commits
68 Commits
db-setup-w
...
jerco/pull
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
249444c06f | ||
|
|
86ad3cf166 | ||
|
|
a629711f81 | ||
|
|
61f18af782 | ||
|
|
7e8e43c552 | ||
|
|
6ceed5ce55 | ||
|
|
b145e0524c | ||
|
|
b610b58d2e | ||
|
|
1ba9f89c54 | ||
|
|
c0ae76690d | ||
|
|
08b762cbf2 | ||
|
|
052b14cf79 | ||
|
|
2d938e511d | ||
|
|
571beb13d9 | ||
|
|
2fc8e5e0b6 | ||
|
|
5ab07273ba | ||
|
|
19c9e5bfdf | ||
|
|
60794367a5 | ||
|
|
ea07729bbf | ||
|
|
c4370773f6 | ||
|
|
fda17b456e | ||
|
|
bc3e1a0a71 | ||
|
|
a06988706c | ||
|
|
ce73124bbf | ||
|
|
352c62f3c3 | ||
|
|
81a51d3942 | ||
|
|
64fc3a39a7 | ||
|
|
e5b6f4f293 | ||
|
|
d26e63ed9a | ||
|
|
f4f5d31959 | ||
|
|
e7e12075b9 | ||
|
|
74dda5aa19 | ||
|
|
092e96ce70 | ||
|
|
18102027ba | ||
|
|
f80825d63e | ||
|
|
9316e47b77 | ||
|
|
f99cf1218a | ||
|
|
5871915ce9 | ||
|
|
5ce290043f | ||
|
|
080d27321b | ||
|
|
1d0936bd14 | ||
|
|
706b8ca9df | ||
|
|
7dc491b7ba | ||
|
|
779c789a64 | ||
|
|
409b4ba109 | ||
|
|
59d131d3ac | ||
|
|
6563d09ba7 | ||
|
|
05dea18b62 | ||
|
|
d7177c7d89 | ||
|
|
35f0fea804 | ||
|
|
8953c7c533 | ||
|
|
76c59a5545 | ||
|
|
237048c7ac | ||
|
|
30ff395b7b | ||
|
|
5c0a31b829 | ||
|
|
243bc3d41d | ||
|
|
67b594a950 | ||
|
|
2493c21649 | ||
|
|
d3826e670f | ||
|
|
4b5b1696b7 | ||
|
|
abb59ef14f | ||
|
|
3b7c2816b9 | ||
|
|
484517416f | ||
|
|
39447055d3 | ||
|
|
95cca277c9 | ||
|
|
96083dcaf5 | ||
|
|
75b4cf691b | ||
|
|
7c9171b00b |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.21.0b1
|
||||
current_version = 0.21.0rc1
|
||||
parse = (?P<major>\d+)
|
||||
\.(?P<minor>\d+)
|
||||
\.(?P<patch>\d+)
|
||||
@@ -34,17 +34,9 @@ first_value = 1
|
||||
|
||||
[bumpversion:file:plugins/postgres/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/redshift/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/snowflake/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/bigquery/setup.py]
|
||||
|
||||
[bumpversion:file:plugins/postgres/dbt/adapters/postgres/__version__.py]
|
||||
|
||||
[bumpversion:file:plugins/redshift/dbt/adapters/redshift/__version__.py]
|
||||
|
||||
[bumpversion:file:plugins/snowflake/dbt/adapters/snowflake/__version__.py]
|
||||
|
||||
[bumpversion:file:plugins/bigquery/dbt/adapters/bigquery/__version__.py]
|
||||
|
||||
|
||||
10
.github/dependabot.yml
vendored
10
.github/dependabot.yml
vendored
@@ -21,16 +21,6 @@ updates:
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/redshift"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/plugins/snowflake"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
rebase-strategy: "disabled"
|
||||
|
||||
# docker dependencies
|
||||
- package-ecosystem: "docker"
|
||||
|
||||
2
.github/scripts/integration-test-matrix.js
vendored
2
.github/scripts/integration-test-matrix.js
vendored
@@ -1,7 +1,7 @@
|
||||
module.exports = ({ context }) => {
|
||||
const defaultPythonVersion = "3.8";
|
||||
const supportedPythonVersions = ["3.6", "3.7", "3.8", "3.9"];
|
||||
const supportedAdapters = ["snowflake", "postgres", "bigquery", "redshift"];
|
||||
const supportedAdapters = ["postgres", "bigquery"];
|
||||
|
||||
// if PR, generate matrix based on files changed and PR labels
|
||||
if (context.eventName.includes("pull_request")) {
|
||||
|
||||
34
.github/workflows/integration.yml
vendored
34
.github/workflows/integration.yml
vendored
@@ -91,16 +91,9 @@ jobs:
|
||||
- 'core/**'
|
||||
- 'plugins/postgres/**'
|
||||
- 'dev-requirements.txt'
|
||||
snowflake:
|
||||
- 'core/**'
|
||||
- 'plugins/snowflake/**'
|
||||
bigquery:
|
||||
- 'core/**'
|
||||
- 'plugins/bigquery/**'
|
||||
redshift:
|
||||
- 'core/**'
|
||||
- 'plugins/redshift/**'
|
||||
- 'plugins/postgres/**'
|
||||
|
||||
- name: Generate integration test matrix
|
||||
id: generate-matrix
|
||||
@@ -191,33 +184,6 @@ jobs:
|
||||
if: matrix.adapter == 'postgres'
|
||||
run: tox
|
||||
|
||||
- name: Run tox (redshift)
|
||||
if: matrix.adapter == 'redshift'
|
||||
env:
|
||||
REDSHIFT_TEST_DBNAME: ${{ secrets.REDSHIFT_TEST_DBNAME }}
|
||||
REDSHIFT_TEST_PASS: ${{ secrets.REDSHIFT_TEST_PASS }}
|
||||
REDSHIFT_TEST_USER: ${{ secrets.REDSHIFT_TEST_USER }}
|
||||
REDSHIFT_TEST_PORT: ${{ secrets.REDSHIFT_TEST_PORT }}
|
||||
REDSHIFT_TEST_HOST: ${{ secrets.REDSHIFT_TEST_HOST }}
|
||||
run: tox
|
||||
|
||||
- name: Run tox (snowflake)
|
||||
if: matrix.adapter == 'snowflake'
|
||||
env:
|
||||
SNOWFLAKE_TEST_ACCOUNT: ${{ secrets.SNOWFLAKE_TEST_ACCOUNT }}
|
||||
SNOWFLAKE_TEST_PASSWORD: ${{ secrets.SNOWFLAKE_TEST_PASSWORD }}
|
||||
SNOWFLAKE_TEST_USER: ${{ secrets.SNOWFLAKE_TEST_USER }}
|
||||
SNOWFLAKE_TEST_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_WAREHOUSE }}
|
||||
SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN: ${{ secrets.SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN }}
|
||||
SNOWFLAKE_TEST_OAUTH_CLIENT_ID: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_ID }}
|
||||
SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET }}
|
||||
SNOWFLAKE_TEST_ALT_DATABASE: ${{ secrets.SNOWFLAKE_TEST_ALT_DATABASE }}
|
||||
SNOWFLAKE_TEST_ALT_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_ALT_WAREHOUSE }}
|
||||
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }}
|
||||
SNOWFLAKE_TEST_QUOTED_DATABASE: ${{ secrets.SNOWFLAKE_TEST_QUOTED_DATABASE }}
|
||||
SNOWFLAKE_TEST_ROLE: ${{ secrets.SNOWFLAKE_TEST_ROLE }}
|
||||
run: tox
|
||||
|
||||
- name: Run tox (bigquery)
|
||||
if: matrix.adapter == 'bigquery'
|
||||
env:
|
||||
|
||||
6
.github/workflows/performance.yml
vendored
6
.github/workflows/performance.yml
vendored
@@ -164,11 +164,13 @@ jobs:
|
||||
name: runner
|
||||
- name: change permissions
|
||||
run: chmod +x ./runner
|
||||
- name: make results directory
|
||||
run: mkdir ./final-output/
|
||||
- name: run calculation
|
||||
run: ./runner calculate -r ./
|
||||
run: ./runner calculate -r ./ -o ./final-output/
|
||||
# always attempt to upload the results even if there were regressions found
|
||||
- uses: actions/upload-artifact@v2
|
||||
if: ${{ always() }}
|
||||
with:
|
||||
name: final-calculations
|
||||
path: ./final_calculations.json
|
||||
path: ./final-output/*
|
||||
|
||||
@@ -26,7 +26,7 @@ This is the docs website code. It comes from the dbt-docs repository, and is gen
|
||||
|
||||
## Adapters
|
||||
|
||||
dbt uses an adapter-plugin pattern to extend support to different databases, warehouses, query engines, etc. The four core adapters that are in the main repository, contained within the [`plugins`](plugins) subdirectory, are: Postgres Redshift, Snowflake and BigQuery. Other warehouses use adapter plugins defined in separate repositories (e.g. [dbt-spark](https://github.com/dbt-labs/dbt-spark), [dbt-presto](https://github.com/dbt-labs/dbt-presto)).
|
||||
dbt uses an adapter-plugin pattern to extend support to different databases, warehouses, query engines, etc. For testing and development purposes, the dbt-postgres plugin lives alongside the dbt-core codebase, in the [`plugins`](plugins) subdirectory. Like other adapter plugins, it is a self-contained codebase and package that builds on top of dbt-core.
|
||||
|
||||
Each adapter is a mix of python, Jinja2, and SQL. The adapter code also makes heavy use of Jinja2 to wrap modular chunks of SQL functionality, define default implementations, and allow plugins to override it.
|
||||
|
||||
|
||||
48
CHANGELOG.md
48
CHANGELOG.md
@@ -1,12 +1,47 @@
|
||||
## dbt 1.0.0 (Release TBD)
|
||||
|
||||
### Features
|
||||
- Normalize global CLI arguments/flags ([#2990](https://github.com/dbt-labs/dbt/issues/2990), [#3839](https://github.com/dbt-labs/dbt/pull/3839))
|
||||
|
||||
### Fixes
|
||||
|
||||
### Under the hood
|
||||
|
||||
- Enact deprecation for `materialization-return` and replace deprecation warning with an exception. ([#3896](https://github.com/dbt-labs/dbt/issues/3896))
|
||||
- Build catalog for only relational, non-ephemeral nodes in the graph ([#3920](https://github.com/dbt-labs/dbt/issues/3920))
|
||||
- Enact deprecation to remove the `release` arg from the `execute_macro` method. ([#3900](https://github.com/dbt-labs/dbt/issues/3900))
|
||||
- Enact deprecation for default quoting to be True. Override for the `dbt-snowflake` adapter so it stays `False`. ([#3898](https://github.com/dbt-labs/dbt/issues/3898))
|
||||
|
||||
Contributors:
|
||||
|
||||
- [@dave-connors-3](https://github.com/dave-connors-3) ([#3920](https://github.com/dbt-labs/dbt/issues/3920))
|
||||
|
||||
|
||||
## dbt 0.21.0 (Release TBD)
|
||||
|
||||
### Fixes
|
||||
- Fix batching for large seeds on Snowflake ([#3941](https://github.com/dbt-labs/dbt/issues/3941), [#3942](https://github.com/dbt-labs/dbt/pull/3942))
|
||||
- Avoid infinite recursion in `state:modified.macros` check ([#3904](https://github.com/dbt-labs/dbt/issues/3904), [#3957](https://github.com/dbt-labs/dbt/pull/3957))
|
||||
|
||||
### Under the hood
|
||||
- Bump artifact schema versions for 0.21.0 ([#3945](https://github.com/dbt-labs/dbt/pull/3945))
|
||||
|
||||
## dbt 0.21.0rc1 (September 20, 2021)
|
||||
|
||||
### Features
|
||||
|
||||
- Experimental parser now detects macro overrides of ref, source, and config builtins. ([#3581](https://github.com/dbt-labs/dbt/issues/3866), [#3582](https://github.com/dbt-labs/dbt/pull/3877))
|
||||
- Add connect_timeout profile configuration for Postgres and Redshift adapters. ([#3581](https://github.com/dbt-labs/dbt/issues/3581), [#3582](https://github.com/dbt-labs/dbt/pull/3582))
|
||||
- Enhance BigQuery copy materialization ([#3570](https://github.com/dbt-labs/dbt/issues/3570), [#3606](https://github.com/dbt-labs/dbt/pull/3606)):
|
||||
- to simplify config (default usage of `copy_materialization='table'` if is is not found in global or local config)
|
||||
- to let copy several source tables into single target table at a time. ([Google doc reference](https://cloud.google.com/bigquery/docs/managing-tables#copying_multiple_source_tables))
|
||||
- Customize ls task JSON output by adding new flag `--output-keys` ([#3778](https://github.com/dbt-labs/dbt/issues/3778), [#3395](https://github.com/dbt-labs/dbt/issues/3395))
|
||||
- Add support for execution project on BigQuery through profile configuration ([#3707](https://github.com/dbt-labs/dbt/issues/3707), [#3708](https://github.com/dbt-labs/dbt/issues/3708))
|
||||
- Skip downstream nodes during the `build` task when a test fails. ([#3597](https://github.com/dbt-labs/dbt/issues/3597), [#3792](https://github.com/dbt-labs/dbt/pull/3792))
|
||||
- Added default field in the `selectors.yml` to allow user to define default selector ([#3448](https://github.com/dbt-labs/dbt/issues/3448), [#3875](https://github.com/dbt-labs/dbt/issues/3875), [#3892](https://github.com/dbt-labs/dbt/issues/3892))
|
||||
- Added timing and thread information to sources.json artifact ([#3804](https://github.com/dbt-labs/dbt/issues/3804), [#3894](https://github.com/dbt-labs/dbt/pull/3894))
|
||||
- Update cli and rpc flags for the `build` task to align with other commands (`--resource-type`, `--store-failures`) ([#3596](https://github.com/dbt-labs/dbt/issues/3596), [#3884](https://github.com/dbt-labs/dbt/pull/3884))
|
||||
- Log tests that are not indirectly selected. Add `--greedy` flag to `test`, `list`, `build` and `greedy` property in yaml selectors ([#3723](https://github.com/dbt-labs/dbt/pull/3723), [#3833](https://github.com/dbt-labs/dbt/pull/3833))
|
||||
|
||||
### Fixes
|
||||
|
||||
@@ -15,19 +50,18 @@
|
||||
- Fix issue when running the `deps` task after the `list` task in the RPC server ([#3846](https://github.com/dbt-labs/dbt/issues/3846), [#3848](https://github.com/dbt-labs/dbt/pull/3848), [#3850](https://github.com/dbt-labs/dbt/pull/3850))
|
||||
- Fix bug with initializing a dataclass that inherits from `typing.Protocol`, specifically for `dbt.config.profile.Profile` ([#3843](https://github.com/dbt-labs/dbt/issues/3843), [#3855](https://github.com/dbt-labs/dbt/pull/3855))
|
||||
- Introduce a macro, `get_where_subquery`, for tests that use `where` config. Alias filtering subquery as `dbt_subquery` instead of resource identifier ([#3857](https://github.com/dbt-labs/dbt/issues/3857), [#3859](https://github.com/dbt-labs/dbt/issues/3859))
|
||||
|
||||
### Fixes
|
||||
|
||||
- Use group by column_name in accepted_values test for compatibility with most database engines ([#3905](https://github.com/dbt-labs/dbt/issues/3905), [#3906](https://github.com/dbt-labs/dbt/pull/3906))
|
||||
- Separated table vs view configuration for BigQuery since some configuration is not possible to set for tables vs views. ([#3682](https://github.com/dbt-labs/dbt/issues/3682), [#3691](https://github.com/dbt-labs/dbt/issues/3682))
|
||||
|
||||
### Under the hood
|
||||
|
||||
- Use GitHub Actions for CI ([#3688](https://github.com/dbt-labs/dbt/issues/3688), [#3669](https://github.com/dbt-labs/dbt/pull/3669))
|
||||
- Better dbt hub registry packages version logging that prompts the user for upgrades to relevant packages ([#3560](https://github.com/dbt-labs/dbt/issues/3560), [#3763](https://github.com/dbt-labs/dbt/issues/3763), [#3759](https://github.com/dbt-labs/dbt/pull/3759))
|
||||
- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/fishtown-analytics/dbt/issues/3622), [#3623](https://github.com/fishtown-analytics/dbt/pull/3623))
|
||||
- Allow the default seed macro's SQL parameter, `%s`, to be replaced by dispatching a new macro, `get_binding_char()`. This enables adapters with parameter marker characters such as `?` to not have to override `basic_load_csv_rows`. ([#3622](https://github.com/dbt-labs/dbt/issues/3622), [#3623](https://github.com/dbt-labs/dbt/pull/3623))
|
||||
- Alert users on package rename ([hub.getdbt.com#180](https://github.com/dbt-labs/hub.getdbt.com/issues/810), [#3825](https://github.com/dbt-labs/dbt/pull/3825))
|
||||
- Add `adapter_unique_id` to invocation context in anonymous usage tracking, to better understand dbt adoption ([#3713](https://github.com/dbt-labs/dbt/issues/3713), [#3796](https://github.com/dbt-labs/dbt/issues/3796))
|
||||
- Specify `macro_namespace = 'dbt'` for all dispatched macros in the global project, making it possible to dispatch to macro implementations defined in packages. Dispatch `generate_schema_name` and `generate_alias_name` ([#3456](https://github.com/dbt-labs/dbt/issues/3456), [#3851](https://github.com/dbt-labs/dbt/issues/3851))
|
||||
- Retry transient GitHub failures during download ([#3729](https://github.com/dbt-labs/dbt/pull/3729))
|
||||
|
||||
Contributors:
|
||||
|
||||
@@ -36,10 +70,13 @@ Contributors:
|
||||
- [@dbrtly](https://github.com/dbrtly) ([#3834](https://github.com/dbt-labs/dbt/pull/3834))
|
||||
- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/dbt-labs/dbt/pull/3623)
|
||||
- [@JasonGluck](https://github.com/JasonGluck) ([#3582](https://github.com/dbt-labs/dbt/pull/3582))
|
||||
- [@joellabes](https://github.com/joellabes) ([#3669](https://github.com/dbt-labs/dbt/pull/3669))
|
||||
- [@joellabes](https://github.com/joellabes) ([#3669](https://github.com/dbt-labs/dbt/pull/3669), [#3833](https://github.com/dbt-labs/dbt/pull/3833))
|
||||
- [@juma-adoreme](https://github.com/juma-adoreme) ([#3838](https://github.com/dbt-labs/dbt/pull/3838))
|
||||
- [@annafil](https://github.com/annafil) ([#3825](https://github.com/dbt-labs/dbt/pull/3825))
|
||||
- [@AndreasTA-AW](https://github.com/AndreasTA-AW) ([#3691](https://github.com/dbt-labs/dbt/pull/3691))
|
||||
- [@Kayrnt](https://github.com/Kayrnt) ([3707](https://github.com/dbt-labs/dbt/pull/3707))
|
||||
- [@TeddyCr](https://github.com/TeddyCr) ([#3448](https://github.com/dbt-labs/dbt/pull/3865))
|
||||
- [@sdebruyn](https://github.com/sdebruyn) ([#3906](https://github.com/dbt-labs/dbt/pull/3906))
|
||||
|
||||
## dbt 0.21.0b2 (August 19, 2021)
|
||||
|
||||
@@ -106,6 +143,7 @@ Contributors:
|
||||
|
||||
- Better error handling for BigQuery job labels that are too long. ([#3612](https://github.com/dbt-labs/dbt/pull/3612), [#3703](https://github.com/dbt-labs/dbt/pull/3703))
|
||||
- Get more information on partial parsing version mismatches ([#3757](https://github.com/dbt-labs/dbt/issues/3757), [#3758](https://github.com/dbt-labs/dbt/pull/3758))
|
||||
- Switch to full reparse on partial parsing exceptions. Log and report exception information. ([#3725](https://github.com/dbt-labs/dbt/issues/3725), [#3733](https://github.com/dbt-labs/dbt/pull/3733))
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ The `dbt` maintainers use labels to categorize open issues. Some labels indicate
|
||||
|
||||
- **Trunks** are where active development of the next release takes place. There is one trunk named `develop` at the time of writing this, and will be the default branch of the repository.
|
||||
- **Release Branches** track a specific, not yet complete release of `dbt`. Each minor version release has a corresponding release branch. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of `dbt`.
|
||||
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk brnach or a specific release branch.
|
||||
- **Feature Branches** track individual features and fixes. On completion they should be merged into the trunk branch or a specific release branch.
|
||||
|
||||
## Getting the code
|
||||
|
||||
@@ -135,7 +135,7 @@ brew install postgresql
|
||||
|
||||
### Installation
|
||||
|
||||
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Next, install `dbt` (and its dependencies) with:
|
||||
First make sure that you set up your `virtualenv` as described in [Setting up an environment](#setting-up-an-environment). Also ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt` (and its dependencies) with:
|
||||
|
||||
```sh
|
||||
make dev
|
||||
@@ -170,6 +170,8 @@ docker-compose up -d database
|
||||
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
|
||||
```
|
||||
|
||||
Note that you may need to run the previous command twice as it does not currently wait for the database to be running before attempting to run commands against it. This will be fixed with [#3876](https://github.com/dbt-labs/dbt/issues/3876).
|
||||
|
||||
`dbt` uses test credentials specified in a `test.env` file in the root of the repository for non-Postgres databases. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against `dbt`. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials. This step is only required to use non-Postgres databases.
|
||||
|
||||
```
|
||||
|
||||
16
Makefile
16
Makefile
@@ -44,22 +44,6 @@ integration-postgres: .env ## Runs postgres integration tests with py38.
|
||||
integration-postgres-fail-fast: .env ## Runs postgres integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-postgres -- -x -nauto
|
||||
|
||||
.PHONY: integration-redshift
|
||||
integration-redshift: .env ## Runs redshift integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-redshift -- -nauto
|
||||
|
||||
.PHONY: integration-redshift-fail-fast
|
||||
integration-redshift-fail-fast: .env ## Runs redshift integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-redshift -- -x -nauto
|
||||
|
||||
.PHONY: integration-snowflake
|
||||
integration-snowflake: .env ## Runs snowflake integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-snowflake -- -nauto
|
||||
|
||||
.PHONY: integration-snowflake-fail-fast
|
||||
integration-snowflake-fail-fast: .env ## Runs snowflake integration tests with py38 in "fail fast" mode.
|
||||
$(DOCKER_CMD) tox -e py38-snowflake -- -x -nauto
|
||||
|
||||
.PHONY: integration-bigquery
|
||||
integration-bigquery: .env ## Runs bigquery integration tests with py38.
|
||||
$(DOCKER_CMD) tox -e py38-bigquery -- -nauto
|
||||
|
||||
@@ -238,12 +238,6 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
|
||||
@classmethod
|
||||
def _rollback(cls, connection: Connection) -> None:
|
||||
"""Roll back the given connection."""
|
||||
if flags.STRICT_MODE:
|
||||
if not isinstance(connection, Connection):
|
||||
raise dbt.exceptions.CompilerException(
|
||||
f'In _rollback, got {connection} - not a Connection!'
|
||||
)
|
||||
|
||||
if connection.transaction_open is False:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Tried to rollback transaction on connection '
|
||||
@@ -257,12 +251,6 @@ class BaseConnectionManager(metaclass=abc.ABCMeta):
|
||||
|
||||
@classmethod
|
||||
def close(cls, connection: Connection) -> Connection:
|
||||
if flags.STRICT_MODE:
|
||||
if not isinstance(connection, Connection):
|
||||
raise dbt.exceptions.CompilerException(
|
||||
f'In close, got {connection} - not a Connection!'
|
||||
)
|
||||
|
||||
# if the connection is in closed or init, there's nothing to do
|
||||
if connection.state in {ConnectionState.CLOSED, ConnectionState.INIT}:
|
||||
return connection
|
||||
|
||||
@@ -16,9 +16,7 @@ from dbt.exceptions import (
|
||||
get_relation_returned_multiple_results,
|
||||
InternalException, NotImplementedException, RuntimeException,
|
||||
)
|
||||
from dbt import flags
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt.adapters.protocol import (
|
||||
AdapterConfig,
|
||||
ConnectionManagerProtocol,
|
||||
@@ -289,9 +287,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
def _schema_is_cached(self, database: Optional[str], schema: str) -> bool:
|
||||
"""Check if the schema is cached, and by default logs if it is not."""
|
||||
|
||||
if flags.USE_CACHE is False:
|
||||
return False
|
||||
elif (database, schema) not in self.cache:
|
||||
if (database, schema) not in self.cache:
|
||||
logger.debug(
|
||||
'On "{}": cache miss for schema "{}.{}", this is inefficient'
|
||||
.format(self.nice_connection_name(), database, schema)
|
||||
@@ -324,7 +320,9 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
"""
|
||||
info_schema_name_map = SchemaSearchMap()
|
||||
nodes: Iterator[CompileResultNode] = chain(
|
||||
manifest.nodes.values(),
|
||||
[node for node in manifest.nodes.values() if (
|
||||
node.is_relational and not node.is_ephemeral_model
|
||||
)],
|
||||
manifest.sources.values(),
|
||||
)
|
||||
for node in nodes:
|
||||
@@ -340,9 +338,6 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
"""Populate the relations cache for the given schemas. Returns an
|
||||
iterable of the schemas populated, as strings.
|
||||
"""
|
||||
if not flags.USE_CACHE:
|
||||
return
|
||||
|
||||
cache_schemas = self._get_cache_schemas(manifest)
|
||||
with executor(self.config) as tpe:
|
||||
futures: List[Future[List[BaseRelation]]] = []
|
||||
@@ -375,9 +370,6 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
"""Run a query that gets a populated cache of the relations in the
|
||||
database and set the cache on this adapter.
|
||||
"""
|
||||
if not flags.USE_CACHE:
|
||||
return
|
||||
|
||||
with self.cache.lock:
|
||||
if clear:
|
||||
self.cache.clear()
|
||||
@@ -391,8 +383,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
raise_compiler_error(
|
||||
'Attempted to cache a null relation for {}'.format(name)
|
||||
)
|
||||
if flags.USE_CACHE:
|
||||
self.cache.add(relation)
|
||||
self.cache.add(relation)
|
||||
# so jinja doesn't render things
|
||||
return ''
|
||||
|
||||
@@ -406,8 +397,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
raise_compiler_error(
|
||||
'Attempted to drop a null relation for {}'.format(name)
|
||||
)
|
||||
if flags.USE_CACHE:
|
||||
self.cache.drop(relation)
|
||||
self.cache.drop(relation)
|
||||
return ''
|
||||
|
||||
@available
|
||||
@@ -428,8 +418,7 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
.format(src_name, dst_name, name)
|
||||
)
|
||||
|
||||
if flags.USE_CACHE:
|
||||
self.cache.rename(from_relation, to_relation)
|
||||
self.cache.rename(from_relation, to_relation)
|
||||
return ''
|
||||
|
||||
###
|
||||
@@ -807,12 +796,11 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
def quote_seed_column(
|
||||
self, column: str, quote_config: Optional[bool]
|
||||
) -> str:
|
||||
# this is the default for now
|
||||
quote_columns: bool = False
|
||||
quote_columns: bool = True
|
||||
if isinstance(quote_config, bool):
|
||||
quote_columns = quote_config
|
||||
elif quote_config is None:
|
||||
deprecations.warn('column-quoting-unset')
|
||||
pass
|
||||
else:
|
||||
raise_compiler_error(
|
||||
f'The seed configuration value of "quote_columns" has an '
|
||||
@@ -944,7 +932,6 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
project: Optional[str] = None,
|
||||
context_override: Optional[Dict[str, Any]] = None,
|
||||
kwargs: Dict[str, Any] = None,
|
||||
release: bool = False,
|
||||
text_only_columns: Optional[Iterable[str]] = None,
|
||||
) -> agate.Table:
|
||||
"""Look macro_name up in the manifest and execute its results.
|
||||
@@ -958,10 +945,8 @@ class BaseAdapter(metaclass=AdapterMeta):
|
||||
execution context.
|
||||
:param kwargs: An optional dict of keyword args used to pass to the
|
||||
macro.
|
||||
:param release: Ignored.
|
||||
"""
|
||||
if release is not False:
|
||||
deprecations.warn('execute-macro-release')
|
||||
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
if context_override is None:
|
||||
|
||||
@@ -11,7 +11,6 @@ from dbt.contracts.connection import (
|
||||
Connection, ConnectionState, AdapterResponse
|
||||
)
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt import flags
|
||||
|
||||
|
||||
class SQLConnectionManager(BaseConnectionManager):
|
||||
@@ -144,13 +143,6 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
|
||||
def begin(self):
|
||||
connection = self.get_thread_connection()
|
||||
|
||||
if flags.STRICT_MODE:
|
||||
if not isinstance(connection, Connection):
|
||||
raise dbt.exceptions.CompilerException(
|
||||
f'In begin, got {connection} - not a Connection!'
|
||||
)
|
||||
|
||||
if connection.transaction_open is True:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Tried to begin a new transaction on connection "{}", but '
|
||||
@@ -163,12 +155,6 @@ class SQLConnectionManager(BaseConnectionManager):
|
||||
|
||||
def commit(self):
|
||||
connection = self.get_thread_connection()
|
||||
if flags.STRICT_MODE:
|
||||
if not isinstance(connection, Connection):
|
||||
raise dbt.exceptions.CompilerException(
|
||||
f'In commit, got {connection} - not a Connection!'
|
||||
)
|
||||
|
||||
if connection.transaction_open is False:
|
||||
raise dbt.exceptions.InternalException(
|
||||
'Tried to commit transaction on connection "{}", but '
|
||||
|
||||
@@ -30,7 +30,7 @@ def find_matching(
|
||||
root_path: str,
|
||||
relative_paths_to_search: List[str],
|
||||
file_pattern: str,
|
||||
) -> List[Dict[str, str]]:
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Given an absolute `root_path`, a list of relative paths to that
|
||||
absolute root path (`relative_paths_to_search`), and a `file_pattern`
|
||||
@@ -61,11 +61,19 @@ def find_matching(
|
||||
relative_path = os.path.relpath(
|
||||
absolute_path, absolute_path_to_search
|
||||
)
|
||||
modification_time = 0.0
|
||||
try:
|
||||
modification_time = os.path.getmtime(absolute_path)
|
||||
except OSError:
|
||||
logger.exception(
|
||||
f"Error retrieving modification time for file {absolute_path}"
|
||||
)
|
||||
if reobj.match(local_file):
|
||||
matching.append({
|
||||
'searched_path': relative_path_to_search,
|
||||
'absolute_path': absolute_path,
|
||||
'relative_path': relative_path,
|
||||
'modification_time': modification_time,
|
||||
})
|
||||
|
||||
return matching
|
||||
|
||||
@@ -10,7 +10,7 @@ from dbt.adapters.factory import get_adapter
|
||||
from dbt.clients import jinja
|
||||
from dbt.clients.system import make_directory
|
||||
from dbt.context.providers import generate_runtime_model
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.manifest import Manifest, UniqueID
|
||||
from dbt.contracts.graph.compiled import (
|
||||
COMPILED_TYPES,
|
||||
CompiledSchemaTestNode,
|
||||
@@ -107,6 +107,18 @@ def _extend_prepended_ctes(prepended_ctes, new_prepended_ctes):
|
||||
_add_prepended_cte(prepended_ctes, new_cte)
|
||||
|
||||
|
||||
def _get_tests_for_node(manifest: Manifest, unique_id: UniqueID) -> List[UniqueID]:
|
||||
""" Get a list of tests that depend on the node with the
|
||||
provided unique id """
|
||||
|
||||
return [
|
||||
node.unique_id
|
||||
for _, node in manifest.nodes.items()
|
||||
if node.resource_type == NodeType.Test and
|
||||
unique_id in node.depends_on_nodes
|
||||
]
|
||||
|
||||
|
||||
class Linker:
|
||||
def __init__(self, data=None):
|
||||
if data is None:
|
||||
@@ -142,7 +154,7 @@ class Linker:
|
||||
include all nodes in their corresponding graph entries.
|
||||
"""
|
||||
out_graph = self.graph.copy()
|
||||
for node_id in self.graph.nodes():
|
||||
for node_id in self.graph:
|
||||
data = manifest.expect(node_id).to_dict(omit_none=True)
|
||||
out_graph.add_node(node_id, **data)
|
||||
nx.write_gpickle(out_graph, outfile)
|
||||
@@ -412,13 +424,80 @@ class Compiler:
|
||||
self.link_node(linker, node, manifest)
|
||||
for exposure in manifest.exposures.values():
|
||||
self.link_node(linker, exposure, manifest)
|
||||
# linker.add_node(exposure.unique_id)
|
||||
|
||||
cycle = linker.find_cycles()
|
||||
|
||||
if cycle:
|
||||
raise RuntimeError("Found a cycle: {}".format(cycle))
|
||||
|
||||
self.resolve_graph(linker, manifest)
|
||||
|
||||
def resolve_graph(self, linker: Linker, manifest: Manifest) -> None:
|
||||
""" This method adds additional edges to the DAG. For a given non-test
|
||||
executable node, add an edge from an upstream test to the given node if
|
||||
the set of nodes the test depends on is a proper/strict subset of the
|
||||
upstream nodes for the given node. """
|
||||
|
||||
# Given a graph:
|
||||
# model1 --> model2 --> model3
|
||||
# | |
|
||||
# | \/
|
||||
# \/ test 2
|
||||
# test1
|
||||
#
|
||||
# Produce the following graph:
|
||||
# model1 --> model2 --> model3
|
||||
# | | /\ /\
|
||||
# | \/ | |
|
||||
# \/ test2 ------- |
|
||||
# test1 -------------------
|
||||
|
||||
for node_id in linker.graph:
|
||||
# If node is executable (in manifest.nodes) and does _not_
|
||||
# represent a test, continue.
|
||||
if (
|
||||
node_id in manifest.nodes and
|
||||
manifest.nodes[node_id].resource_type != NodeType.Test
|
||||
):
|
||||
# Get *everything* upstream of the node
|
||||
all_upstream_nodes = nx.traversal.bfs_tree(
|
||||
linker.graph, node_id, reverse=True
|
||||
)
|
||||
# Get the set of upstream nodes not including the current node.
|
||||
upstream_nodes = set([
|
||||
n for n in all_upstream_nodes if n != node_id
|
||||
])
|
||||
|
||||
# Get all tests that depend on any upstream nodes.
|
||||
upstream_tests = []
|
||||
for upstream_node in upstream_nodes:
|
||||
upstream_tests += _get_tests_for_node(
|
||||
manifest,
|
||||
upstream_node
|
||||
)
|
||||
|
||||
for upstream_test in upstream_tests:
|
||||
# Get the set of all nodes that the test depends on
|
||||
# including the upstream_node itself. This is necessary
|
||||
# because tests can depend on multiple nodes (ex:
|
||||
# relationship tests). Test nodes do not distinguish
|
||||
# between what node the test is "testing" and what
|
||||
# node(s) it depends on.
|
||||
test_depends_on = set(
|
||||
manifest.nodes[upstream_test].depends_on_nodes
|
||||
)
|
||||
|
||||
# If the set of nodes that an upstream test depends on
|
||||
# is a proper (or strict) subset of all upstream nodes of
|
||||
# the current node, add an edge from the upstream test
|
||||
# to the current node. Must be a proper/strict subset to
|
||||
# avoid adding a circular dependency to the graph.
|
||||
if (test_depends_on < upstream_nodes):
|
||||
linker.graph.add_edge(
|
||||
upstream_test,
|
||||
node_id
|
||||
)
|
||||
|
||||
def compile(self, manifest: Manifest, write=True) -> Graph:
|
||||
self.initialize()
|
||||
linker = Linker()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# all these are just exports, they need "noqa" so flake8 will not complain.
|
||||
from .profile import Profile, PROFILES_DIR, read_user_config # noqa
|
||||
from .profile import Profile, read_user_config # noqa
|
||||
from .project import Project, IsFQNResource # noqa
|
||||
from .runtime import RuntimeConfig, UnsetProfileConfig # noqa
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
|
||||
from dbt.dataclass_schema import ValidationError
|
||||
|
||||
from dbt import flags
|
||||
from dbt.clients.system import load_file_contents
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.contracts.connection import Credentials, HasCredentials
|
||||
@@ -20,10 +21,8 @@ from dbt.utils import coerce_dict_str
|
||||
from .renderer import ProfileRenderer
|
||||
|
||||
DEFAULT_THREADS = 1
|
||||
|
||||
DEFAULT_PROFILES_DIR = os.path.join(os.path.expanduser('~'), '.dbt')
|
||||
PROFILES_DIR = os.path.expanduser(
|
||||
os.getenv('DBT_PROFILES_DIR', DEFAULT_PROFILES_DIR)
|
||||
)
|
||||
|
||||
INVALID_PROFILE_MESSAGE = """
|
||||
dbt encountered an error while trying to read your profiles.yml file.
|
||||
@@ -43,7 +42,7 @@ Here, [profile name] should be replaced with a profile name
|
||||
defined in your profiles.yml file. You can find profiles.yml here:
|
||||
|
||||
{profiles_file}/profiles.yml
|
||||
""".format(profiles_file=PROFILES_DIR)
|
||||
""".format(profiles_file=DEFAULT_PROFILES_DIR)
|
||||
|
||||
|
||||
def read_profile(profiles_dir: str) -> Dict[str, Any]:
|
||||
@@ -73,10 +72,10 @@ def read_user_config(directory: str) -> UserConfig:
|
||||
try:
|
||||
profile = read_profile(directory)
|
||||
if profile:
|
||||
user_cfg = coerce_dict_str(profile.get('config', {}))
|
||||
if user_cfg is not None:
|
||||
UserConfig.validate(user_cfg)
|
||||
return UserConfig.from_dict(user_cfg)
|
||||
user_config = coerce_dict_str(profile.get('config', {}))
|
||||
if user_config is not None:
|
||||
UserConfig.validate(user_config)
|
||||
return UserConfig.from_dict(user_config)
|
||||
except (RuntimeException, ValidationError):
|
||||
pass
|
||||
return UserConfig()
|
||||
@@ -89,7 +88,7 @@ def read_user_config(directory: str) -> UserConfig:
|
||||
class Profile(HasCredentials):
|
||||
profile_name: str
|
||||
target_name: str
|
||||
config: UserConfig
|
||||
user_config: UserConfig
|
||||
threads: int
|
||||
credentials: Credentials
|
||||
|
||||
@@ -97,7 +96,7 @@ class Profile(HasCredentials):
|
||||
self,
|
||||
profile_name: str,
|
||||
target_name: str,
|
||||
config: UserConfig,
|
||||
user_config: UserConfig,
|
||||
threads: int,
|
||||
credentials: Credentials
|
||||
):
|
||||
@@ -106,7 +105,7 @@ class Profile(HasCredentials):
|
||||
"""
|
||||
self.profile_name = profile_name
|
||||
self.target_name = target_name
|
||||
self.config = config
|
||||
self.user_config = user_config
|
||||
self.threads = threads
|
||||
self.credentials = credentials
|
||||
|
||||
@@ -124,12 +123,12 @@ class Profile(HasCredentials):
|
||||
result = {
|
||||
'profile_name': self.profile_name,
|
||||
'target_name': self.target_name,
|
||||
'config': self.config,
|
||||
'user_config': self.user_config,
|
||||
'threads': self.threads,
|
||||
'credentials': self.credentials,
|
||||
}
|
||||
if serialize_credentials:
|
||||
result['config'] = self.config.to_dict(omit_none=True)
|
||||
result['user_config'] = self.user_config.to_dict(omit_none=True)
|
||||
result['credentials'] = self.credentials.to_dict(omit_none=True)
|
||||
return result
|
||||
|
||||
@@ -143,7 +142,7 @@ class Profile(HasCredentials):
|
||||
'name': self.target_name,
|
||||
'target_name': self.target_name,
|
||||
'profile_name': self.profile_name,
|
||||
'config': self.config.to_dict(omit_none=True),
|
||||
'config': self.user_config.to_dict(omit_none=True),
|
||||
})
|
||||
return target
|
||||
|
||||
@@ -238,7 +237,7 @@ class Profile(HasCredentials):
|
||||
threads: int,
|
||||
profile_name: str,
|
||||
target_name: str,
|
||||
user_cfg: Optional[Dict[str, Any]] = None
|
||||
user_config: Optional[Dict[str, Any]] = None
|
||||
) -> 'Profile':
|
||||
"""Create a profile from an existing set of Credentials and the
|
||||
remaining information.
|
||||
@@ -247,20 +246,20 @@ class Profile(HasCredentials):
|
||||
:param threads: The number of threads to use for connections.
|
||||
:param profile_name: The profile name used for this profile.
|
||||
:param target_name: The target name used for this profile.
|
||||
:param user_cfg: The user-level config block from the
|
||||
:param user_config: The user-level config block from the
|
||||
raw profiles, if specified.
|
||||
:raises DbtProfileError: If the profile is invalid.
|
||||
:returns: The new Profile object.
|
||||
"""
|
||||
if user_cfg is None:
|
||||
user_cfg = {}
|
||||
UserConfig.validate(user_cfg)
|
||||
config = UserConfig.from_dict(user_cfg)
|
||||
if user_config is None:
|
||||
user_config = {}
|
||||
UserConfig.validate(user_config)
|
||||
user_config_obj: UserConfig = UserConfig.from_dict(user_config)
|
||||
|
||||
profile = cls(
|
||||
profile_name=profile_name,
|
||||
target_name=target_name,
|
||||
config=config,
|
||||
user_config=user_config_obj,
|
||||
threads=threads,
|
||||
credentials=credentials
|
||||
)
|
||||
@@ -313,7 +312,7 @@ class Profile(HasCredentials):
|
||||
raw_profile: Dict[str, Any],
|
||||
profile_name: str,
|
||||
renderer: ProfileRenderer,
|
||||
user_cfg: Optional[Dict[str, Any]] = None,
|
||||
user_config: Optional[Dict[str, Any]] = None,
|
||||
target_override: Optional[str] = None,
|
||||
threads_override: Optional[int] = None,
|
||||
) -> 'Profile':
|
||||
@@ -325,7 +324,7 @@ class Profile(HasCredentials):
|
||||
disk as yaml and its values rendered with jinja.
|
||||
:param profile_name: The profile name used.
|
||||
:param renderer: The config renderer.
|
||||
:param user_cfg: The global config for the user, if it
|
||||
:param user_config: The global config for the user, if it
|
||||
was present.
|
||||
:param target_override: The target to use, if provided on
|
||||
the command line.
|
||||
@@ -335,9 +334,9 @@ class Profile(HasCredentials):
|
||||
target could not be found
|
||||
:returns: The new Profile object.
|
||||
"""
|
||||
# user_cfg is not rendered.
|
||||
if user_cfg is None:
|
||||
user_cfg = raw_profile.get('config')
|
||||
# user_config is not rendered.
|
||||
if user_config is None:
|
||||
user_config = raw_profile.get('config')
|
||||
# TODO: should it be, and the values coerced to bool?
|
||||
target_name, profile_data = cls.render_profile(
|
||||
raw_profile, profile_name, target_override, renderer
|
||||
@@ -358,7 +357,7 @@ class Profile(HasCredentials):
|
||||
profile_name=profile_name,
|
||||
target_name=target_name,
|
||||
threads=threads,
|
||||
user_cfg=user_cfg
|
||||
user_config=user_config
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -401,13 +400,13 @@ class Profile(HasCredentials):
|
||||
error_string=msg
|
||||
)
|
||||
)
|
||||
user_cfg = raw_profiles.get('config')
|
||||
user_config = raw_profiles.get('config')
|
||||
|
||||
return cls.from_raw_profile_info(
|
||||
raw_profile=raw_profile,
|
||||
profile_name=profile_name,
|
||||
renderer=renderer,
|
||||
user_cfg=user_cfg,
|
||||
user_config=user_config,
|
||||
target_override=target_override,
|
||||
threads_override=threads_override,
|
||||
)
|
||||
@@ -435,7 +434,7 @@ class Profile(HasCredentials):
|
||||
"""
|
||||
threads_override = getattr(args, 'threads', None)
|
||||
target_override = getattr(args, 'target', None)
|
||||
raw_profiles = read_profile(args.profiles_dir)
|
||||
raw_profiles = read_profile(flags.PROFILES_DIR)
|
||||
profile_name = cls.pick_profile_name(getattr(args, 'profile', None),
|
||||
project_profile_name)
|
||||
return cls.from_raw_profiles(
|
||||
|
||||
@@ -645,13 +645,24 @@ class Project:
|
||||
def hashed_name(self):
|
||||
return hashlib.md5(self.project_name.encode('utf-8')).hexdigest()
|
||||
|
||||
def get_selector(self, name: str) -> SelectionSpec:
|
||||
def get_selector(self, name: str) -> Union[SelectionSpec, bool]:
|
||||
if name not in self.selectors:
|
||||
raise RuntimeException(
|
||||
f'Could not find selector named {name}, expected one of '
|
||||
f'{list(self.selectors)}'
|
||||
)
|
||||
return self.selectors[name]
|
||||
return self.selectors[name]["definition"]
|
||||
|
||||
def get_default_selector_name(self) -> Union[str, None]:
|
||||
"""This function fetch the default selector to use on `dbt run` (if any)
|
||||
:return: either a selector if default is set or None
|
||||
:rtype: Union[SelectionSpec, None]
|
||||
"""
|
||||
for selector_name, selector in self.selectors.items():
|
||||
if selector["default"] is True:
|
||||
return selector_name
|
||||
|
||||
return None
|
||||
|
||||
def get_macro_search_order(self, macro_namespace: str):
|
||||
for dispatch_entry in self.dispatch:
|
||||
|
||||
@@ -12,6 +12,7 @@ from .profile import Profile
|
||||
from .project import Project
|
||||
from .renderer import DbtProjectYamlRenderer, ProfileRenderer
|
||||
from .utils import parse_cli_vars
|
||||
from dbt import flags
|
||||
from dbt import tracking
|
||||
from dbt.adapters.factory import get_relation_class_by_name, get_include_paths
|
||||
from dbt.helper_types import FQNPath, PathSet
|
||||
@@ -117,7 +118,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
unrendered=project.unrendered,
|
||||
profile_name=profile.profile_name,
|
||||
target_name=profile.target_name,
|
||||
config=profile.config,
|
||||
user_config=profile.user_config,
|
||||
threads=profile.threads,
|
||||
credentials=profile.credentials,
|
||||
args=args,
|
||||
@@ -144,7 +145,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
project = Project.from_project_root(
|
||||
project_root,
|
||||
renderer,
|
||||
verify_version=getattr(self.args, 'version_check', False),
|
||||
verify_version=bool(flags.VERSION_CHECK),
|
||||
)
|
||||
|
||||
cfg = self.from_parts(
|
||||
@@ -197,7 +198,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
) -> Tuple[Project, Profile]:
|
||||
# profile_name from the project
|
||||
project_root = args.project_dir if args.project_dir else os.getcwd()
|
||||
version_check = getattr(args, 'version_check', False)
|
||||
version_check = bool(flags.VERSION_CHECK)
|
||||
partial = Project.partial_load(
|
||||
project_root,
|
||||
verify_version=version_check
|
||||
@@ -416,7 +417,7 @@ class UnsetConfig(UserConfig):
|
||||
class UnsetProfile(Profile):
|
||||
def __init__(self):
|
||||
self.credentials = UnsetCredentials()
|
||||
self.config = UnsetConfig()
|
||||
self.user_config = UnsetConfig()
|
||||
self.profile_name = ''
|
||||
self.target_name = ''
|
||||
self.threads = -1
|
||||
@@ -513,7 +514,7 @@ class UnsetProfileConfig(RuntimeConfig):
|
||||
unrendered=project.unrendered,
|
||||
profile_name='',
|
||||
target_name='',
|
||||
config=UnsetConfig(),
|
||||
user_config=UnsetConfig(),
|
||||
threads=getattr(args, 'threads', 1),
|
||||
credentials=UnsetCredentials(),
|
||||
args=args,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, Union
|
||||
from dbt.clients.yaml_helper import ( # noqa: F401
|
||||
yaml, Loader, Dumper, load_yaml_text
|
||||
)
|
||||
@@ -29,13 +29,14 @@ Validator Error:
|
||||
"""
|
||||
|
||||
|
||||
class SelectorConfig(Dict[str, SelectionSpec]):
|
||||
class SelectorConfig(Dict[str, Dict[str, Union[SelectionSpec, bool]]]):
|
||||
|
||||
@classmethod
|
||||
def selectors_from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
|
||||
try:
|
||||
SelectorFile.validate(data)
|
||||
selector_file = SelectorFile.from_dict(data)
|
||||
validate_selector_default(selector_file)
|
||||
selectors = parse_from_selectors_definition(selector_file)
|
||||
except ValidationError as exc:
|
||||
yaml_sel_cfg = yaml.dump(exc.instance)
|
||||
@@ -118,6 +119,24 @@ def selector_config_from_data(
|
||||
return selectors
|
||||
|
||||
|
||||
def validate_selector_default(selector_file: SelectorFile) -> None:
|
||||
"""Check if a selector.yml file has more than 1 default key set to true"""
|
||||
default_set: bool = False
|
||||
default_selector_name: Union[str, None] = None
|
||||
|
||||
for selector in selector_file.selectors:
|
||||
if selector.default is True and default_set is False:
|
||||
default_set = True
|
||||
default_selector_name = selector.name
|
||||
continue
|
||||
if selector.default is True and default_set is True:
|
||||
raise DbtSelectorsError(
|
||||
"Error when parsing the selector file. "
|
||||
"Found multiple selectors with `default: true`:"
|
||||
f"{default_selector_name} and {selector.name}"
|
||||
)
|
||||
|
||||
|
||||
# These are utilities to clean up the dictionary created from
|
||||
# selectors.yml by turning the cli-string format entries into
|
||||
# normalized dictionary entries. It parallels the flow in
|
||||
|
||||
@@ -526,8 +526,6 @@ class BaseContext(metaclass=ContextMeta):
|
||||
|
||||
The list of valid flags are:
|
||||
|
||||
- `flags.STRICT_MODE`: True if `--strict` (or `-S`) was provided on the
|
||||
command line
|
||||
- `flags.FULL_REFRESH`: True if `--full-refresh` was provided on the
|
||||
command line
|
||||
- `flags.NON_DESTRUCTIVE`: True if `--non-destructive` was provided on
|
||||
|
||||
@@ -186,14 +186,11 @@ class UserConfigContract(Protocol):
|
||||
partial_parse: Optional[bool] = None
|
||||
printer_width: Optional[int] = None
|
||||
|
||||
def set_values(self, cookie_dir: str) -> None:
|
||||
...
|
||||
|
||||
|
||||
class HasCredentials(Protocol):
|
||||
credentials: Credentials
|
||||
profile_name: str
|
||||
config: UserConfigContract
|
||||
user_config: UserConfigContract
|
||||
target_name: str
|
||||
threads: int
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ parse_file_type_to_parser = {
|
||||
class FilePath(dbtClassMixin):
|
||||
searched_path: str
|
||||
relative_path: str
|
||||
modification_time: float
|
||||
project_root: str
|
||||
|
||||
@property
|
||||
@@ -132,6 +133,10 @@ class RemoteFile(dbtClassMixin):
|
||||
def original_file_path(self):
|
||||
return 'from remote system'
|
||||
|
||||
@property
|
||||
def modification_time(self):
|
||||
return 'from remote system'
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseSourceFile(dbtClassMixin, SerializableType):
|
||||
@@ -150,8 +155,6 @@ class BaseSourceFile(dbtClassMixin, SerializableType):
|
||||
def file_id(self):
|
||||
if isinstance(self.path, RemoteFile):
|
||||
return None
|
||||
if self.checksum.name == 'none':
|
||||
return None
|
||||
return f'{self.project_name}://{self.path.original_file_path}'
|
||||
|
||||
def _serialize(self):
|
||||
|
||||
@@ -223,9 +223,7 @@ class ManifestMetadata(BaseArtifactMetadata):
|
||||
self.user_id = tracking.active_user.id
|
||||
|
||||
if self.send_anonymous_usage_stats is None:
|
||||
self.send_anonymous_usage_stats = (
|
||||
not tracking.active_user.do_not_track
|
||||
)
|
||||
self.send_anonymous_usage_stats = flags.SEND_ANONYMOUS_USAGE_STATS
|
||||
|
||||
@classmethod
|
||||
def default(cls):
|
||||
@@ -1071,7 +1069,7 @@ AnyManifest = Union[Manifest, MacroManifest]
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('manifest', 2)
|
||||
@schema_version('manifest', 3)
|
||||
class WritableManifest(ArtifactMixin):
|
||||
nodes: Mapping[UniqueID, ManifestNode] = field(
|
||||
metadata=dict(description=(
|
||||
|
||||
@@ -156,13 +156,6 @@ class ParsedNodeMixins(dbtClassMixin):
|
||||
self.columns = patch.columns
|
||||
self.meta = patch.meta
|
||||
self.docs = patch.docs
|
||||
if flags.STRICT_MODE:
|
||||
# It seems odd that an instance can be invalid
|
||||
# Maybe there should be validation or restrictions
|
||||
# elsewhere?
|
||||
assert isinstance(self, dbtClassMixin)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
self.validate(dct)
|
||||
|
||||
def get_materialization(self):
|
||||
return self.config.materialized
|
||||
@@ -509,11 +502,6 @@ class ParsedMacro(UnparsedBaseNode, HasUniqueID):
|
||||
self.meta = patch.meta
|
||||
self.docs = patch.docs
|
||||
self.arguments = patch.arguments
|
||||
if flags.STRICT_MODE:
|
||||
# What does this actually validate?
|
||||
assert isinstance(self, dbtClassMixin)
|
||||
dct = self.to_dict(omit_none=False)
|
||||
self.validate(dct)
|
||||
|
||||
def same_contents(self, other: Optional['ParsedMacro']) -> bool:
|
||||
if other is None:
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from dbt.contracts.util import Replaceable, Mergeable, list_str
|
||||
from dbt.contracts.connection import UserConfigContract, QueryComment
|
||||
from dbt.contracts.connection import QueryComment, UserConfigContract
|
||||
from dbt.helper_types import NoValue
|
||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||
from dbt import tracking
|
||||
from dbt import ui
|
||||
from dbt.dataclass_schema import (
|
||||
dbtClassMixin, ValidationError,
|
||||
HyphenatedDbtClassMixin,
|
||||
@@ -230,25 +228,20 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
|
||||
use_colors: Optional[bool] = None
|
||||
partial_parse: Optional[bool] = None
|
||||
printer_width: Optional[int] = None
|
||||
|
||||
def set_values(self, cookie_dir):
|
||||
if self.send_anonymous_usage_stats:
|
||||
tracking.initialize_tracking(cookie_dir)
|
||||
else:
|
||||
tracking.do_not_track()
|
||||
|
||||
if self.use_colors is not None:
|
||||
ui.use_colors(self.use_colors)
|
||||
|
||||
if self.printer_width:
|
||||
ui.printer_width(self.printer_width)
|
||||
write_json: Optional[bool] = None
|
||||
warn_error: Optional[bool] = None
|
||||
log_format: Optional[bool] = None
|
||||
debug: Optional[bool] = None
|
||||
version_check: Optional[bool] = None
|
||||
fail_fast: Optional[bool] = None
|
||||
use_experimental_parser: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProfileConfig(HyphenatedDbtClassMixin, Replaceable):
|
||||
profile_name: str = field(metadata={'preserve_underscore': True})
|
||||
target_name: str = field(metadata={'preserve_underscore': True})
|
||||
config: UserConfig
|
||||
user_config: UserConfig = field(metadata={'preserve_underscore': True})
|
||||
threads: int
|
||||
# TODO: make this a dynamic union of some kind?
|
||||
credentials: Optional[Dict[str, Any]]
|
||||
|
||||
@@ -185,7 +185,7 @@ class RunExecutionResult(
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('run-results', 2)
|
||||
@schema_version('run-results', 3)
|
||||
class RunResultsArtifact(ExecutionResult, ArtifactMixin):
|
||||
results: Sequence[RunResultOutput]
|
||||
args: Dict[str, Any] = field(default_factory=dict)
|
||||
@@ -285,6 +285,9 @@ class SourceFreshnessOutput(dbtClassMixin):
|
||||
status: FreshnessStatus
|
||||
criteria: FreshnessThreshold
|
||||
adapter_response: Dict[str, Any]
|
||||
timing: List[TimingInfo]
|
||||
thread_id: str
|
||||
execution_time: float
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -333,7 +336,10 @@ def process_freshness_result(
|
||||
max_loaded_at_time_ago_in_s=result.age,
|
||||
status=result.status,
|
||||
criteria=criteria,
|
||||
adapter_response=result.adapter_response
|
||||
adapter_response=result.adapter_response,
|
||||
timing=result.timing,
|
||||
thread_id=result.thread_id,
|
||||
execution_time=result.execution_time,
|
||||
)
|
||||
|
||||
|
||||
@@ -363,7 +369,7 @@ class FreshnessResult(ExecutionResult):
|
||||
|
||||
|
||||
@dataclass
|
||||
@schema_version('sources', 1)
|
||||
@schema_version('sources', 2)
|
||||
class FreshnessExecutionResultArtifact(
|
||||
ArtifactMixin,
|
||||
VersionedSchema,
|
||||
|
||||
@@ -121,9 +121,9 @@ class RPCDocsGenerateParameters(RPCParameters):
|
||||
|
||||
@dataclass
|
||||
class RPCBuildParameters(RPCParameters):
|
||||
threads: Optional[int] = None
|
||||
models: Union[None, str, List[str]] = None
|
||||
resource_types: Optional[List[str]] = None
|
||||
select: Union[None, str, List[str]] = None
|
||||
threads: Optional[int] = None
|
||||
exclude: Union[None, str, List[str]] = None
|
||||
selector: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
|
||||
@@ -9,6 +9,7 @@ class SelectorDefinition(dbtClassMixin):
|
||||
name: str
|
||||
definition: Union[str, Dict[str, Any]]
|
||||
description: str = ''
|
||||
default: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -57,22 +57,6 @@ class DispatchPackagesDeprecation(DBTDeprecation):
|
||||
'''
|
||||
|
||||
|
||||
class MaterializationReturnDeprecation(DBTDeprecation):
|
||||
_name = 'materialization-return'
|
||||
|
||||
_description = '''\
|
||||
The materialization ("{materialization}") did not explicitly return a list
|
||||
of relations to add to the cache. By default the target relation will be
|
||||
added, but this behavior will be removed in a future version of dbt.
|
||||
|
||||
|
||||
|
||||
For more information, see:
|
||||
|
||||
https://docs.getdbt.com/v0.15/docs/creating-new-materializations#section-6-returning-relations
|
||||
'''
|
||||
|
||||
|
||||
class NotADictionaryDeprecation(DBTDeprecation):
|
||||
_name = 'not-a-dictionary'
|
||||
|
||||
@@ -82,21 +66,6 @@ class NotADictionaryDeprecation(DBTDeprecation):
|
||||
'''
|
||||
|
||||
|
||||
class ColumnQuotingDeprecation(DBTDeprecation):
|
||||
_name = 'column-quoting-unset'
|
||||
|
||||
_description = '''\
|
||||
The quote_columns parameter was not set for seeds, so the default value of
|
||||
False was chosen. The default will change to True in a future release.
|
||||
|
||||
|
||||
|
||||
For more information, see:
|
||||
|
||||
https://docs.getdbt.com/v0.15/docs/seeds#section-specify-column-quoting
|
||||
'''
|
||||
|
||||
|
||||
class ModelsKeyNonModelDeprecation(DBTDeprecation):
|
||||
_name = 'models-key-mismatch'
|
||||
|
||||
@@ -113,15 +82,6 @@ class ModelsKeyNonModelDeprecation(DBTDeprecation):
|
||||
'''
|
||||
|
||||
|
||||
class ExecuteMacrosReleaseDeprecation(DBTDeprecation):
|
||||
_name = 'execute-macro-release'
|
||||
_description = '''\
|
||||
The "release" argument to execute_macro is now ignored, and will be removed
|
||||
in a future relase of dbt. At that time, providing a `release` argument
|
||||
will result in an error.
|
||||
'''
|
||||
|
||||
|
||||
class AdapterMacroDeprecation(DBTDeprecation):
|
||||
_name = 'adapter-macro'
|
||||
_description = '''\
|
||||
@@ -178,11 +138,8 @@ active_deprecations: Set[str] = set()
|
||||
|
||||
deprecations_list: List[DBTDeprecation] = [
|
||||
DispatchPackagesDeprecation(),
|
||||
MaterializationReturnDeprecation(),
|
||||
NotADictionaryDeprecation(),
|
||||
ColumnQuotingDeprecation(),
|
||||
ModelsKeyNonModelDeprecation(),
|
||||
ExecuteMacrosReleaseDeprecation(),
|
||||
AdapterMacroDeprecation(),
|
||||
PackageRedirectDeprecation()
|
||||
]
|
||||
|
||||
@@ -6,18 +6,49 @@ if os.name != 'nt':
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# initially all flags are set to None, the on-load call of reset() will set
|
||||
# them for their first time.
|
||||
STRICT_MODE = None
|
||||
FULL_REFRESH = None
|
||||
USE_CACHE = None
|
||||
WARN_ERROR = None
|
||||
TEST_NEW_PARSER = None
|
||||
# PROFILES_DIR must be set before the other flags
|
||||
# It also gets set in main.py and in set_from_args because the rpc server
|
||||
# doesn't go through exactly the same main arg processing.
|
||||
DEFAULT_PROFILES_DIR = os.path.join(os.path.expanduser('~'), '.dbt')
|
||||
PROFILES_DIR = os.path.expanduser(
|
||||
os.getenv('DBT_PROFILES_DIR', DEFAULT_PROFILES_DIR)
|
||||
)
|
||||
|
||||
STRICT_MODE = False # Only here for backwards compatibility
|
||||
FULL_REFRESH = False # subcommand
|
||||
STORE_FAILURES = False # subcommand
|
||||
GREEDY = None # subcommand
|
||||
|
||||
# Global CLI commands
|
||||
USE_EXPERIMENTAL_PARSER = None
|
||||
WARN_ERROR = None
|
||||
WRITE_JSON = None
|
||||
PARTIAL_PARSE = None
|
||||
USE_COLORS = None
|
||||
STORE_FAILURES = None
|
||||
DEBUG = None
|
||||
LOG_FORMAT = None
|
||||
VERSION_CHECK = None
|
||||
FAIL_FAST = None
|
||||
SEND_ANONYMOUS_USAGE_STATS = None
|
||||
PRINTER_WIDTH = 80
|
||||
|
||||
# Global CLI defaults. These flags are set from three places:
|
||||
# CLI args, environment variables, and user_config (profiles.yml).
|
||||
# Environment variables use the pattern 'DBT_{flag name}', like DBT_PROFILES_DIR
|
||||
flag_defaults = {
|
||||
"USE_EXPERIMENTAL_PARSER": False,
|
||||
"WARN_ERROR": False,
|
||||
"WRITE_JSON": True,
|
||||
"PARTIAL_PARSE": False,
|
||||
"USE_COLORS": True,
|
||||
"PROFILES_DIR": DEFAULT_PROFILES_DIR,
|
||||
"DEBUG": False,
|
||||
"LOG_FORMAT": None,
|
||||
"VERSION_CHECK": True,
|
||||
"FAIL_FAST": False,
|
||||
"SEND_ANONYMOUS_USAGE_STATS": True,
|
||||
"PRINTER_WIDTH": 80
|
||||
}
|
||||
|
||||
|
||||
def env_set_truthy(key: str) -> Optional[str]:
|
||||
@@ -30,6 +61,12 @@ def env_set_truthy(key: str) -> Optional[str]:
|
||||
return value
|
||||
|
||||
|
||||
def env_set_bool(env_value):
|
||||
if env_value in ('1', 't', 'true', 'y', 'yes'):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def env_set_path(key: str) -> Optional[Path]:
|
||||
value = os.getenv(key)
|
||||
if value is None:
|
||||
@@ -50,56 +87,75 @@ def _get_context():
|
||||
return multiprocessing.get_context('spawn')
|
||||
|
||||
|
||||
# This is not a flag, it's a place to store the lock
|
||||
MP_CONTEXT = _get_context()
|
||||
|
||||
|
||||
def reset():
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
|
||||
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
|
||||
STORE_FAILURES
|
||||
|
||||
STRICT_MODE = False
|
||||
FULL_REFRESH = False
|
||||
USE_CACHE = True
|
||||
WARN_ERROR = False
|
||||
TEST_NEW_PARSER = False
|
||||
USE_EXPERIMENTAL_PARSER = False
|
||||
WRITE_JSON = True
|
||||
PARTIAL_PARSE = False
|
||||
MP_CONTEXT = _get_context()
|
||||
USE_COLORS = True
|
||||
STORE_FAILURES = False
|
||||
|
||||
|
||||
def set_from_args(args):
|
||||
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
|
||||
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
|
||||
STORE_FAILURES
|
||||
|
||||
USE_CACHE = getattr(args, 'use_cache', USE_CACHE)
|
||||
def set_from_args(args, user_config):
|
||||
global STRICT_MODE, FULL_REFRESH, WARN_ERROR, \
|
||||
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, USE_COLORS, \
|
||||
STORE_FAILURES, PROFILES_DIR, DEBUG, LOG_FORMAT, GREEDY, \
|
||||
VERSION_CHECK, FAIL_FAST, SEND_ANONYMOUS_USAGE_STATS, PRINTER_WIDTH
|
||||
|
||||
STRICT_MODE = False # backwards compatibility
|
||||
# cli args without user_config or env var option
|
||||
FULL_REFRESH = getattr(args, 'full_refresh', FULL_REFRESH)
|
||||
STRICT_MODE = getattr(args, 'strict', STRICT_MODE)
|
||||
WARN_ERROR = (
|
||||
STRICT_MODE or
|
||||
getattr(args, 'warn_error', STRICT_MODE or WARN_ERROR)
|
||||
)
|
||||
|
||||
TEST_NEW_PARSER = getattr(args, 'test_new_parser', TEST_NEW_PARSER)
|
||||
USE_EXPERIMENTAL_PARSER = getattr(args, 'use_experimental_parser', USE_EXPERIMENTAL_PARSER)
|
||||
WRITE_JSON = getattr(args, 'write_json', WRITE_JSON)
|
||||
PARTIAL_PARSE = getattr(args, 'partial_parse', None)
|
||||
MP_CONTEXT = _get_context()
|
||||
|
||||
# The use_colors attribute will always have a value because it is assigned
|
||||
# None by default from the add_mutually_exclusive_group function
|
||||
use_colors_override = getattr(args, 'use_colors')
|
||||
|
||||
if use_colors_override is not None:
|
||||
USE_COLORS = use_colors_override
|
||||
|
||||
STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
|
||||
GREEDY = getattr(args, 'greedy', GREEDY)
|
||||
|
||||
# global cli flags with env var and user_config alternatives
|
||||
USE_EXPERIMENTAL_PARSER = get_flag_value('USE_EXPERIMENTAL_PARSER', args, user_config)
|
||||
WARN_ERROR = get_flag_value('WARN_ERROR', args, user_config)
|
||||
WRITE_JSON = get_flag_value('WRITE_JSON', args, user_config)
|
||||
PARTIAL_PARSE = get_flag_value('PARTIAL_PARSE', args, user_config)
|
||||
USE_COLORS = get_flag_value('USE_COLORS', args, user_config)
|
||||
PROFILES_DIR = get_flag_value('PROFILES_DIR', args, user_config)
|
||||
DEBUG = get_flag_value('DEBUG', args, user_config)
|
||||
LOG_FORMAT = get_flag_value('LOG_FORMAT', args, user_config)
|
||||
VERSION_CHECK = get_flag_value('VERSION_CHECK', args, user_config)
|
||||
FAIL_FAST = get_flag_value('FAIL_FAST', args, user_config)
|
||||
SEND_ANONYMOUS_USAGE_STATS = get_flag_value('SEND_ANONYMOUS_USAGE_STATS', args, user_config)
|
||||
PRINTER_WIDTH = get_flag_value('PRINTER_WIDTH', args, user_config)
|
||||
|
||||
|
||||
# initialize everything to the defaults on module load
|
||||
reset()
|
||||
def get_flag_value(flag, args, user_config):
|
||||
lc_flag = flag.lower()
|
||||
flag_value = getattr(args, lc_flag, None)
|
||||
if flag_value is None:
|
||||
# Environment variables use pattern 'DBT_{flag name}'
|
||||
env_flag = f"DBT_{flag}"
|
||||
env_value = os.getenv(env_flag)
|
||||
if env_value is not None and env_value != '':
|
||||
env_value = env_value.lower()
|
||||
# non Boolean values
|
||||
if flag in ['LOG_FORMAT', 'PRINTER_WIDTH', 'PROFILES_DIR']:
|
||||
flag_value = env_value
|
||||
else:
|
||||
flag_value = env_set_bool(env_value)
|
||||
elif user_config is not None and getattr(user_config, lc_flag, None) is not None:
|
||||
flag_value = getattr(user_config, lc_flag)
|
||||
else:
|
||||
flag_value = flag_defaults[flag]
|
||||
if flag == 'PRINTER_WIDTH': # printer_width must be an int or it hangs
|
||||
flag_value = int(flag_value)
|
||||
if flag == 'PROFILES_DIR':
|
||||
flag_value = os.path.abspath(flag_value)
|
||||
|
||||
return flag_value
|
||||
|
||||
|
||||
def get_flag_dict():
|
||||
return {
|
||||
"use_experimental_parser": USE_EXPERIMENTAL_PARSER,
|
||||
"warn_error": WARN_ERROR,
|
||||
"write_json": WRITE_JSON,
|
||||
"partial_parse": PARTIAL_PARSE,
|
||||
"use_colors": USE_COLORS,
|
||||
"profiles_dir": PROFILES_DIR,
|
||||
"debug": DEBUG,
|
||||
"log_format": LOG_FORMAT,
|
||||
"version_check": VERSION_CHECK,
|
||||
"fail_fast": FAIL_FAST,
|
||||
"send_anonymous_usage_stats": SEND_ANONYMOUS_USAGE_STATS,
|
||||
"printer_width": PRINTER_WIDTH,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# special support for CLI argument parsing.
|
||||
from dbt import flags
|
||||
import itertools
|
||||
from dbt.clients.yaml_helper import yaml, Loader, Dumper # noqa: F401
|
||||
|
||||
@@ -66,7 +67,7 @@ def parse_union_from_default(
|
||||
def parse_difference(
|
||||
include: Optional[List[str]], exclude: Optional[List[str]]
|
||||
) -> SelectionDifference:
|
||||
included = parse_union_from_default(include, DEFAULT_INCLUDES)
|
||||
included = parse_union_from_default(include, DEFAULT_INCLUDES, greedy=bool(flags.GREEDY))
|
||||
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
|
||||
return SelectionDifference(components=[included, excluded])
|
||||
|
||||
@@ -180,7 +181,7 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
|
||||
union_def_parts = _get_list_dicts(definition, 'union')
|
||||
include, exclude = _parse_include_exclude_subdefs(union_def_parts)
|
||||
|
||||
union = SelectionUnion(components=include)
|
||||
union = SelectionUnion(components=include, greedy_warning=False)
|
||||
|
||||
if exclude is None:
|
||||
union.raw = definition
|
||||
@@ -188,7 +189,8 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
|
||||
else:
|
||||
return SelectionDifference(
|
||||
components=[union, exclude],
|
||||
raw=definition
|
||||
raw=definition,
|
||||
greedy_warning=False
|
||||
)
|
||||
|
||||
|
||||
@@ -197,7 +199,7 @@ def parse_intersection_definition(
|
||||
) -> SelectionSpec:
|
||||
intersection_def_parts = _get_list_dicts(definition, 'intersection')
|
||||
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
|
||||
intersection = SelectionIntersection(components=include)
|
||||
intersection = SelectionIntersection(components=include, greedy_warning=False)
|
||||
|
||||
if exclude is None:
|
||||
intersection.raw = definition
|
||||
@@ -205,7 +207,8 @@ def parse_intersection_definition(
|
||||
else:
|
||||
return SelectionDifference(
|
||||
components=[intersection, exclude],
|
||||
raw=definition
|
||||
raw=definition,
|
||||
greedy_warning=False
|
||||
)
|
||||
|
||||
|
||||
@@ -239,7 +242,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
|
||||
if diff_arg is None:
|
||||
return base
|
||||
else:
|
||||
return SelectionDifference(components=[base, diff_arg])
|
||||
return SelectionDifference(components=[base, diff_arg], greedy_warning=False)
|
||||
|
||||
|
||||
def parse_from_definition(
|
||||
@@ -271,10 +274,12 @@ def parse_from_definition(
|
||||
|
||||
def parse_from_selectors_definition(
|
||||
source: SelectorFile
|
||||
) -> Dict[str, SelectionSpec]:
|
||||
result: Dict[str, SelectionSpec] = {}
|
||||
) -> Dict[str, Dict[str, Union[SelectionSpec, bool]]]:
|
||||
result: Dict[str, Dict[str, Union[SelectionSpec, bool]]] = {}
|
||||
selector: SelectorDefinition
|
||||
for selector in source.selectors:
|
||||
result[selector.name] = parse_from_definition(selector.definition,
|
||||
rootlevel=True)
|
||||
result[selector.name] = {
|
||||
"default": selector.default,
|
||||
"definition": parse_from_definition(selector.definition, rootlevel=True)
|
||||
}
|
||||
return result
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
from typing import Set, List, Optional, Tuple
|
||||
|
||||
from .graph import Graph, UniqueId
|
||||
@@ -30,6 +29,24 @@ def alert_non_existence(raw_spec, nodes):
|
||||
)
|
||||
|
||||
|
||||
def alert_unused_nodes(raw_spec, node_names):
|
||||
summary_nodes_str = ("\n - ").join(node_names[:3])
|
||||
debug_nodes_str = ("\n - ").join(node_names)
|
||||
and_more_str = f"\n - and {len(node_names) - 3} more" if len(node_names) > 4 else ""
|
||||
summary_msg = (
|
||||
f"\nSome tests were excluded because at least one parent is not selected. "
|
||||
f"Use the --greedy flag to include them."
|
||||
f"\n - {summary_nodes_str}{and_more_str}"
|
||||
)
|
||||
logger.info(summary_msg)
|
||||
if len(node_names) > 4:
|
||||
debug_msg = (
|
||||
f"Full list of tests that were excluded:"
|
||||
f"\n - {debug_nodes_str}"
|
||||
)
|
||||
logger.debug(debug_msg)
|
||||
|
||||
|
||||
def can_select_indirectly(node):
|
||||
"""If a node is not selected itself, but its parent(s) are, it may qualify
|
||||
for indirect selection.
|
||||
@@ -151,16 +168,16 @@ class NodeSelector(MethodManager):
|
||||
|
||||
return direct_nodes, indirect_nodes
|
||||
|
||||
def select_nodes(self, spec: SelectionSpec) -> Set[UniqueId]:
|
||||
def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
||||
"""Select the nodes in the graph according to the spec.
|
||||
|
||||
This is the main point of entry for turning a spec into a set of nodes:
|
||||
- Recurse through spec, select by criteria, combine by set operation
|
||||
- Return final (unfiltered) selection set
|
||||
"""
|
||||
|
||||
direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
|
||||
return direct_nodes
|
||||
indirect_only = indirect_nodes.difference(direct_nodes)
|
||||
return direct_nodes, indirect_only
|
||||
|
||||
def _is_graph_member(self, unique_id: UniqueId) -> bool:
|
||||
if unique_id in self.manifest.sources:
|
||||
@@ -213,6 +230,8 @@ class NodeSelector(MethodManager):
|
||||
# - If ANY parent is missing, return it separately. We'll keep it around
|
||||
# for later and see if its other parents show up.
|
||||
# We use this for INCLUSION.
|
||||
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
|
||||
# or by specifying `greedy: true` in a yaml selector
|
||||
|
||||
direct_nodes = set(selected)
|
||||
indirect_nodes = set()
|
||||
@@ -251,15 +270,24 @@ class NodeSelector(MethodManager):
|
||||
|
||||
- node selection. Based on the include/exclude sets, the set
|
||||
of matched unique IDs is returned
|
||||
- expand the graph at each leaf node, before combination
|
||||
- selectors might override this. for example, this is where
|
||||
tests are added
|
||||
- includes direct + indirect selection (for tests)
|
||||
- filtering:
|
||||
- selectors can filter the nodes after all of them have been
|
||||
selected
|
||||
"""
|
||||
selected_nodes = self.select_nodes(spec)
|
||||
selected_nodes, indirect_only = self.select_nodes(spec)
|
||||
filtered_nodes = self.filter_selection(selected_nodes)
|
||||
|
||||
if indirect_only:
|
||||
filtered_unused_nodes = self.filter_selection(indirect_only)
|
||||
if filtered_unused_nodes and spec.greedy_warning:
|
||||
# log anything that didn't make the cut
|
||||
unused_node_names = []
|
||||
for unique_id in filtered_unused_nodes:
|
||||
name = self.manifest.nodes[unique_id].name
|
||||
unused_node_names.append(name)
|
||||
alert_unused_nodes(spec, unused_node_names)
|
||||
|
||||
return filtered_nodes
|
||||
|
||||
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:
|
||||
|
||||
@@ -405,27 +405,38 @@ class StateSelectorMethod(SelectorMethod):
|
||||
|
||||
return modified
|
||||
|
||||
def recursively_check_macros_modified(self, node):
|
||||
# check if there are any changes in macros the first time
|
||||
if self.modified_macros is None:
|
||||
self.modified_macros = self._macros_modified()
|
||||
|
||||
def recursively_check_macros_modified(self, node, previous_macros):
|
||||
# loop through all macros that this node depends on
|
||||
for macro_uid in node.depends_on.macros:
|
||||
# avoid infinite recursion if we've already seen this macro
|
||||
if macro_uid in previous_macros:
|
||||
continue
|
||||
previous_macros.append(macro_uid)
|
||||
# is this macro one of the modified macros?
|
||||
if macro_uid in self.modified_macros:
|
||||
return True
|
||||
# if not, and this macro depends on other macros, keep looping
|
||||
macro = self.manifest.macros[macro_uid]
|
||||
if len(macro.depends_on.macros) > 0:
|
||||
return self.recursively_check_macros_modified(macro)
|
||||
macro_node = self.manifest.macros[macro_uid]
|
||||
if len(macro_node.depends_on.macros) > 0:
|
||||
return self.recursively_check_macros_modified(macro_node, previous_macros)
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
def check_macros_modified(self, node):
|
||||
# check if there are any changes in macros the first time
|
||||
if self.modified_macros is None:
|
||||
self.modified_macros = self._macros_modified()
|
||||
# no macros have been modified, skip looping entirely
|
||||
if not self.modified_macros:
|
||||
return False
|
||||
# recursively loop through upstream macros to see if any is modified
|
||||
else:
|
||||
previous_macros = []
|
||||
return self.recursively_check_macros_modified(node, previous_macros)
|
||||
|
||||
def check_modified(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
|
||||
different_contents = not new.same_contents(old) # type: ignore
|
||||
upstream_macro_change = self.recursively_check_macros_modified(new)
|
||||
upstream_macro_change = self.check_macros_modified(new)
|
||||
return different_contents or upstream_macro_change
|
||||
|
||||
def check_modified_body(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
|
||||
@@ -457,7 +468,7 @@ class StateSelectorMethod(SelectorMethod):
|
||||
return False
|
||||
|
||||
def check_modified_macros(self, _, new: SelectorTarget) -> bool:
|
||||
return self.recursively_check_macros_modified(new)
|
||||
return self.check_macros_modified(new)
|
||||
|
||||
def check_new(self, old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
|
||||
return old is None
|
||||
|
||||
@@ -67,6 +67,7 @@ class SelectionCriteria:
|
||||
children: bool
|
||||
children_depth: Optional[int]
|
||||
greedy: bool = False
|
||||
greedy_warning: bool = False # do not raise warning for yaml selectors
|
||||
|
||||
def __post_init__(self):
|
||||
if self.children and self.childrens_parents:
|
||||
@@ -124,11 +125,11 @@ class SelectionCriteria:
|
||||
parents_depth=parents_depth,
|
||||
children=bool(dct.get('children')),
|
||||
children_depth=children_depth,
|
||||
greedy=greedy
|
||||
greedy=(greedy or bool(dct.get('greedy'))),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def dict_from_single_spec(cls, raw: str, greedy: bool = False):
|
||||
def dict_from_single_spec(cls, raw: str):
|
||||
result = RAW_SELECTOR_PATTERN.match(raw)
|
||||
if result is None:
|
||||
return {'error': 'Invalid selector spec'}
|
||||
@@ -145,6 +146,8 @@ class SelectionCriteria:
|
||||
dct['parents'] = bool(dct.get('parents'))
|
||||
if 'children' in dct:
|
||||
dct['children'] = bool(dct.get('children'))
|
||||
if 'greedy' in dct:
|
||||
dct['greedy'] = bool(dct.get('greedy'))
|
||||
return dct
|
||||
|
||||
@classmethod
|
||||
@@ -162,10 +165,12 @@ class BaseSelectionGroup(Iterable[SelectionSpec], metaclass=ABCMeta):
|
||||
self,
|
||||
components: Iterable[SelectionSpec],
|
||||
expect_exists: bool = False,
|
||||
greedy_warning: bool = True,
|
||||
raw: Any = None,
|
||||
):
|
||||
self.components: List[SelectionSpec] = list(components)
|
||||
self.expect_exists = expect_exists
|
||||
self.greedy_warning = greedy_warning
|
||||
self.raw = raw
|
||||
|
||||
def __iter__(self) -> Iterator[SelectionSpec]:
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
{% endmacro %}
|
||||
|
||||
{% macro get_batch_size() -%}
|
||||
{{ adapter.dispatch('get_batch_size', 'dbt')() }}
|
||||
{{ return(adapter.dispatch('get_batch_size', 'dbt')()) }}
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro default__get_batch_size() %}
|
||||
|
||||
@@ -7,7 +7,7 @@ with all_values as (
|
||||
count(*) as n_records
|
||||
|
||||
from {{ model }}
|
||||
group by 1
|
||||
group by {{ column_name }}
|
||||
|
||||
)
|
||||
|
||||
|
||||
222
core/dbt/main.py
222
core/dbt/main.py
@@ -10,30 +10,30 @@ from pathlib import Path
|
||||
|
||||
import dbt.version
|
||||
import dbt.flags as flags
|
||||
import dbt.task.run as run_task
|
||||
import dbt.task.build as build_task
|
||||
import dbt.task.clean as clean_task
|
||||
import dbt.task.compile as compile_task
|
||||
import dbt.task.debug as debug_task
|
||||
import dbt.task.clean as clean_task
|
||||
import dbt.task.deps as deps_task
|
||||
import dbt.task.init as init_task
|
||||
import dbt.task.seed as seed_task
|
||||
import dbt.task.test as test_task
|
||||
import dbt.task.snapshot as snapshot_task
|
||||
import dbt.task.generate as generate_task
|
||||
import dbt.task.serve as serve_task
|
||||
import dbt.task.freshness as freshness_task
|
||||
import dbt.task.run_operation as run_operation_task
|
||||
import dbt.task.generate as generate_task
|
||||
import dbt.task.init as init_task
|
||||
import dbt.task.list as list_task
|
||||
import dbt.task.parse as parse_task
|
||||
import dbt.task.run as run_task
|
||||
import dbt.task.run_operation as run_operation_task
|
||||
import dbt.task.seed as seed_task
|
||||
import dbt.task.serve as serve_task
|
||||
import dbt.task.snapshot as snapshot_task
|
||||
import dbt.task.test as test_task
|
||||
from dbt.profiler import profiler
|
||||
from dbt.task.list import ListTask
|
||||
from dbt.task.rpc.server import RPCServerTask
|
||||
from dbt.adapters.factory import reset_adapters, cleanup_connections
|
||||
|
||||
import dbt.tracking
|
||||
|
||||
from dbt.utils import ExitCodes
|
||||
from dbt.config import PROFILES_DIR, read_user_config
|
||||
from dbt.config.profile import DEFAULT_PROFILES_DIR, read_user_config
|
||||
from dbt.exceptions import RuntimeException, InternalException
|
||||
|
||||
|
||||
@@ -160,17 +160,6 @@ def handle(args):
|
||||
return res
|
||||
|
||||
|
||||
def initialize_config_values(parsed):
|
||||
"""Given the parsed args, initialize the dbt tracking code.
|
||||
|
||||
It would be nice to re-use this profile later on instead of parsing it
|
||||
twice, but dbt's intialization is not structured in a way that makes that
|
||||
easy.
|
||||
"""
|
||||
cfg = read_user_config(parsed.profiles_dir)
|
||||
cfg.set_values(parsed.profiles_dir)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def adapter_management():
|
||||
reset_adapters()
|
||||
@@ -184,8 +173,15 @@ def handle_and_check(args):
|
||||
with log_manager.applicationbound():
|
||||
parsed = parse_args(args)
|
||||
|
||||
# we've parsed the args - we can now decide if we're debug or not
|
||||
if parsed.debug:
|
||||
# Set flags from args, user config, and env vars
|
||||
user_config = read_user_config(flags.PROFILES_DIR) # This is read again later
|
||||
flags.set_from_args(parsed, user_config)
|
||||
dbt.tracking.initialize_from_flags()
|
||||
# Set log_format from flags
|
||||
parsed.cls.set_log_format()
|
||||
|
||||
# we've parsed the args and set the flags - we can now decide if we're debug or not
|
||||
if flags.DEBUG:
|
||||
log_manager.set_debug()
|
||||
|
||||
profiler_enabled = False
|
||||
@@ -198,8 +194,6 @@ def handle_and_check(args):
|
||||
outfile=parsed.record_timing_info
|
||||
):
|
||||
|
||||
initialize_config_values(parsed)
|
||||
|
||||
with adapter_management():
|
||||
|
||||
task, res = run_from_args(parsed)
|
||||
@@ -233,15 +227,17 @@ def track_run(task):
|
||||
|
||||
def run_from_args(parsed):
|
||||
log_cache_events(getattr(parsed, 'log_cache_events', False))
|
||||
flags.set_from_args(parsed)
|
||||
|
||||
parsed.cls.pre_init_hook(parsed)
|
||||
# we can now use the logger for stdout
|
||||
# set log_format in the logger
|
||||
parsed.cls.pre_init_hook(parsed)
|
||||
|
||||
logger.info("Running with dbt{}".format(dbt.version.installed))
|
||||
|
||||
# this will convert DbtConfigErrors into RuntimeExceptions
|
||||
# task could be any one of the task objects
|
||||
task = parsed.cls.from_args(args=parsed)
|
||||
|
||||
logger.debug("running dbt with arguments {parsed}", parsed=str(parsed))
|
||||
|
||||
log_path = None
|
||||
@@ -275,11 +271,12 @@ def _build_base_subparser():
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--profiles-dir',
|
||||
default=PROFILES_DIR,
|
||||
default=None,
|
||||
dest='sub_profiles_dir', # Main cli arg precedes subcommand
|
||||
type=str,
|
||||
help='''
|
||||
Which directory to look in for the profiles.yml file. Default = {}
|
||||
'''.format(PROFILES_DIR)
|
||||
'''.format(DEFAULT_PROFILES_DIR)
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
@@ -319,15 +316,6 @@ def _build_base_subparser():
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
|
||||
base_subparser.add_argument(
|
||||
'--bypass-cache',
|
||||
action='store_false',
|
||||
dest='use_cache',
|
||||
help='''
|
||||
If set, bypass the adapter-level cache of database state
|
||||
''',
|
||||
)
|
||||
|
||||
base_subparser.set_defaults(defer=None, state=None)
|
||||
return base_subparser
|
||||
|
||||
@@ -394,11 +382,46 @@ def _build_build_subparser(subparsers, base_subparser):
|
||||
sub.add_argument(
|
||||
'-x',
|
||||
'--fail-fast',
|
||||
dest='sub_fail_fast',
|
||||
action='store_true',
|
||||
help='''
|
||||
Stop execution upon a first failure.
|
||||
'''
|
||||
)
|
||||
sub.add_argument(
|
||||
'--store-failures',
|
||||
action='store_true',
|
||||
help='''
|
||||
Store test results (failing rows) in the database
|
||||
'''
|
||||
)
|
||||
sub.add_argument(
|
||||
'--greedy',
|
||||
action='store_true',
|
||||
help='''
|
||||
Select all tests that touch the selected resources,
|
||||
even if they also depend on unselected resources
|
||||
'''
|
||||
)
|
||||
resource_values: List[str] = [
|
||||
str(s) for s in build_task.BuildTask.ALL_RESOURCE_VALUES
|
||||
] + ['all']
|
||||
sub.add_argument('--resource-type',
|
||||
choices=resource_values,
|
||||
action='append',
|
||||
default=[],
|
||||
dest='resource_types')
|
||||
# explicity don't support --models
|
||||
sub.add_argument(
|
||||
'-s',
|
||||
'--select',
|
||||
dest='select',
|
||||
nargs='+',
|
||||
help='''
|
||||
Specify the nodes to include.
|
||||
''',
|
||||
)
|
||||
_add_common_selector_arguments(sub)
|
||||
return sub
|
||||
|
||||
|
||||
@@ -497,6 +520,7 @@ def _build_run_subparser(subparsers, base_subparser):
|
||||
run_sub.add_argument(
|
||||
'-x',
|
||||
'--fail-fast',
|
||||
dest='sub_fail_fast',
|
||||
action='store_true',
|
||||
help='''
|
||||
Stop execution upon a first failure.
|
||||
@@ -611,7 +635,7 @@ def _add_table_mutability_arguments(*subparsers):
|
||||
'--full-refresh',
|
||||
action='store_true',
|
||||
help='''
|
||||
If specified, DBT will drop incremental models and
|
||||
If specified, dbt will drop incremental models and
|
||||
fully-recalculate the incremental table from the model definition.
|
||||
'''
|
||||
)
|
||||
@@ -620,8 +644,9 @@ def _add_table_mutability_arguments(*subparsers):
|
||||
def _add_version_check(sub):
|
||||
sub.add_argument(
|
||||
'--no-version-check',
|
||||
dest='version_check',
|
||||
dest='sub_version_check', # main cli arg precedes subcommands
|
||||
action='store_false',
|
||||
default=None,
|
||||
help='''
|
||||
If set, skip ensuring dbt's version matches the one specified in
|
||||
the dbt_project.yml file ('require-dbt-version')
|
||||
@@ -715,6 +740,7 @@ def _build_test_subparser(subparsers, base_subparser):
|
||||
sub.add_argument(
|
||||
'-x',
|
||||
'--fail-fast',
|
||||
dest='sub_fail_fast',
|
||||
action='store_true',
|
||||
help='''
|
||||
Stop execution upon a first test failure.
|
||||
@@ -727,6 +753,14 @@ def _build_test_subparser(subparsers, base_subparser):
|
||||
Store test results (failing rows) in the database
|
||||
'''
|
||||
)
|
||||
sub.add_argument(
|
||||
'--greedy',
|
||||
action='store_true',
|
||||
help='''
|
||||
Select all tests that touch the selected resources,
|
||||
even if they also depend on unselected resources
|
||||
'''
|
||||
)
|
||||
|
||||
sub.set_defaults(cls=test_task.TestTask, which='test', rpc_method='test')
|
||||
return sub
|
||||
@@ -815,9 +849,9 @@ def _build_list_subparser(subparsers, base_subparser):
|
||||
''',
|
||||
aliases=['ls'],
|
||||
)
|
||||
sub.set_defaults(cls=ListTask, which='list', rpc_method=None)
|
||||
sub.set_defaults(cls=list_task.ListTask, which='list', rpc_method=None)
|
||||
resource_values: List[str] = [
|
||||
str(s) for s in ListTask.ALL_RESOURCE_VALUES
|
||||
str(s) for s in list_task.ListTask.ALL_RESOURCE_VALUES
|
||||
] + ['default', 'all']
|
||||
sub.add_argument('--resource-type',
|
||||
choices=resource_values,
|
||||
@@ -852,6 +886,14 @@ def _build_list_subparser(subparsers, base_subparser):
|
||||
metavar='SELECTOR',
|
||||
required=False,
|
||||
)
|
||||
sub.add_argument(
|
||||
'--greedy',
|
||||
action='store_true',
|
||||
help='''
|
||||
Select all tests that touch the selected resources,
|
||||
even if they also depend on unselected resources
|
||||
'''
|
||||
)
|
||||
_add_common_selector_arguments(sub)
|
||||
|
||||
return sub
|
||||
@@ -922,6 +964,7 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
'-d',
|
||||
'--debug',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='''
|
||||
Display debug logging during dbt execution. Useful for debugging and
|
||||
making bug reports.
|
||||
@@ -931,13 +974,14 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
p.add_argument(
|
||||
'--log-format',
|
||||
choices=['text', 'json', 'default'],
|
||||
default='default',
|
||||
default=None,
|
||||
help='''Specify the log format, overriding the command's default.'''
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'--no-write-json',
|
||||
action='store_false',
|
||||
default=None,
|
||||
dest='write_json',
|
||||
help='''
|
||||
If set, skip writing the manifest and run_results.json files to disk
|
||||
@@ -948,6 +992,7 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
'--use-colors',
|
||||
action='store_const',
|
||||
const=True,
|
||||
default=None,
|
||||
dest='use_colors',
|
||||
help='''
|
||||
Colorize the output DBT prints to the terminal. Output is colorized by
|
||||
@@ -969,18 +1014,17 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'-S',
|
||||
'--strict',
|
||||
action='store_true',
|
||||
'--printer-width',
|
||||
dest='printer_width',
|
||||
help='''
|
||||
Run schema validations at runtime. This will surface bugs in dbt, but
|
||||
may incur a performance penalty.
|
||||
Sets the width of terminal output
|
||||
'''
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'--warn-error',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='''
|
||||
If dbt would normally warn, instead raise an exception. Examples
|
||||
include --models that selects nothing, deprecations, configurations
|
||||
@@ -989,6 +1033,17 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
'''
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'--no-version-check',
|
||||
dest='version_check',
|
||||
action='store_false',
|
||||
default=None,
|
||||
help='''
|
||||
If set, skip ensuring dbt's version matches the one specified in
|
||||
the dbt_project.yml file ('require-dbt-version')
|
||||
'''
|
||||
)
|
||||
|
||||
p.add_optional_argument_inverse(
|
||||
'--partial-parse',
|
||||
enable_help='''
|
||||
@@ -1011,26 +1066,48 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
help=argparse.SUPPRESS,
|
||||
)
|
||||
|
||||
# if set, extract all models and blocks with the jinja block extractor, and
|
||||
# verify that we don't fail anywhere the actual jinja parser passes. The
|
||||
# reverse (passing files that ends up failing jinja) is fine.
|
||||
# TODO remove?
|
||||
p.add_argument(
|
||||
'--test-new-parser',
|
||||
action='store_true',
|
||||
help=argparse.SUPPRESS
|
||||
)
|
||||
|
||||
# if set, will use the tree-sitter-jinja2 parser and extractor instead of
|
||||
# jinja rendering when possible.
|
||||
p.add_argument(
|
||||
'--use-experimental-parser',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='''
|
||||
Uses an experimental parser to extract jinja values.
|
||||
'''
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'--profiles-dir',
|
||||
default=None,
|
||||
dest='profiles_dir',
|
||||
type=str,
|
||||
help='''
|
||||
Which directory to look in for the profiles.yml file. Default = {}
|
||||
'''.format(DEFAULT_PROFILES_DIR)
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'--no-anonymous-usage-stats',
|
||||
action='store_false',
|
||||
default=None,
|
||||
dest='send_anonymous_usage_stats',
|
||||
help='''
|
||||
Do not send anonymous usage stat to dbt Labs
|
||||
'''
|
||||
)
|
||||
|
||||
p.add_argument(
|
||||
'-x',
|
||||
'--fail-fast',
|
||||
dest='fail_fast',
|
||||
action='store_true',
|
||||
default=None,
|
||||
help='''
|
||||
Stop execution upon a first failure.
|
||||
'''
|
||||
)
|
||||
|
||||
subs = p.add_subparsers(title="Available sub-commands")
|
||||
|
||||
base_subparser = _build_base_subparser()
|
||||
@@ -1062,7 +1139,7 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
# --select, --exclude
|
||||
# list_sub sets up its own arguments.
|
||||
_add_selection_arguments(
|
||||
build_sub, run_sub, compile_sub, generate_sub, test_sub, snapshot_sub, seed_sub)
|
||||
run_sub, compile_sub, generate_sub, test_sub, snapshot_sub, seed_sub)
|
||||
# --defer
|
||||
_add_defer_argument(run_sub, test_sub, build_sub)
|
||||
# --full-refresh
|
||||
@@ -1078,8 +1155,31 @@ def parse_args(args, cls=DBTArgumentParser):
|
||||
|
||||
parsed = p.parse_args(args)
|
||||
|
||||
# profiles_dir is set before subcommands and after, so normalize
|
||||
if hasattr(parsed, 'sub_profiles_dir'):
|
||||
if parsed.sub_profiles_dir is not None:
|
||||
parsed.profiles_dir = parsed.sub_profiles_dir
|
||||
delattr(parsed, 'sub_profiles_dir')
|
||||
if hasattr(parsed, 'profiles_dir'):
|
||||
parsed.profiles_dir = os.path.abspath(parsed.profiles_dir)
|
||||
if parsed.profiles_dir is None:
|
||||
parsed.profiles_dir = flags.PROFILES_DIR
|
||||
else:
|
||||
parsed.profiles_dir = os.path.abspath(parsed.profiles_dir)
|
||||
# needs to be set before the other flags, because it's needed to
|
||||
# read the profile that contains them
|
||||
flags.PROFILES_DIR = parsed.profiles_dir
|
||||
|
||||
# version_check is set before subcommands and after, so normalize
|
||||
if hasattr(parsed, 'sub_version_check'):
|
||||
if parsed.sub_version_check is False:
|
||||
parsed.version_check = False
|
||||
delattr(parsed, 'sub_version_check')
|
||||
|
||||
# fail_fast is set before subcommands and after, so normalize
|
||||
if hasattr(parsed, 'sub_fail_fast'):
|
||||
if parsed.sub_fail_fast is True:
|
||||
parsed.fail_fast = True
|
||||
delattr(parsed, 'sub_fail_fast')
|
||||
|
||||
if getattr(parsed, 'project_dir', None) is not None:
|
||||
expanded_user = os.path.expanduser(parsed.project_dir)
|
||||
|
||||
@@ -72,10 +72,13 @@ class HookParser(SimpleParser[HookBlock, ParsedHookNode]):
|
||||
|
||||
# Hooks are only in the dbt_project.yml file for the project
|
||||
def get_path(self) -> FilePath:
|
||||
# There ought to be an existing file object for this, but
|
||||
# until that is implemented use a dummy modification time
|
||||
path = FilePath(
|
||||
project_root=self.project.project_root,
|
||||
searched_path='.',
|
||||
relative_path='dbt_project.yml',
|
||||
modification_time=0.0,
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
@@ -64,7 +64,6 @@ from dbt.dataclass_schema import StrEnum, dbtClassMixin
|
||||
|
||||
PARTIAL_PARSE_FILE_NAME = 'partial_parse.msgpack'
|
||||
PARSING_STATE = DbtProcessState('parsing')
|
||||
DEFAULT_PARTIAL_PARSE = False
|
||||
|
||||
|
||||
class ReparseReason(StrEnum):
|
||||
@@ -203,8 +202,11 @@ class ManifestLoader:
|
||||
# used to get the SourceFiles from the manifest files.
|
||||
start_read_files = time.perf_counter()
|
||||
project_parser_files = {}
|
||||
saved_files = {}
|
||||
if self.saved_manifest:
|
||||
saved_files = self.saved_manifest.files
|
||||
for project in self.all_projects.values():
|
||||
read_files(project, self.manifest.files, project_parser_files)
|
||||
read_files(project, self.manifest.files, project_parser_files, saved_files)
|
||||
self._perf_info.path_count = len(self.manifest.files)
|
||||
self._perf_info.read_files_elapsed = (time.perf_counter() - start_read_files)
|
||||
|
||||
@@ -262,7 +264,7 @@ class ManifestLoader:
|
||||
self.manifest._parsing_info = ParsingInfo()
|
||||
|
||||
if skip_parsing:
|
||||
logger.info("Partial parsing enabled, no changes found, skipping parsing")
|
||||
logger.debug("Partial parsing enabled, no changes found, skipping parsing")
|
||||
else:
|
||||
# Load Macros
|
||||
# We need to parse the macros first, so they're resolvable when
|
||||
@@ -423,7 +425,7 @@ class ManifestLoader:
|
||||
if not self.partially_parsing and HookParser in parser_types:
|
||||
hook_parser = HookParser(project, self.manifest, self.root_project)
|
||||
path = hook_parser.get_path()
|
||||
file = load_source_file(path, ParseFileType.Hook, project.project_name)
|
||||
file = load_source_file(path, ParseFileType.Hook, project.project_name, {})
|
||||
if file:
|
||||
file_block = FileBlock(file)
|
||||
hook_parser.parse_file(file_block)
|
||||
@@ -536,18 +538,8 @@ class ManifestLoader:
|
||||
reparse_reason = ReparseReason.project_config_changed
|
||||
return valid, reparse_reason
|
||||
|
||||
def _partial_parse_enabled(self):
|
||||
# if the CLI is set, follow that
|
||||
if flags.PARTIAL_PARSE is not None:
|
||||
return flags.PARTIAL_PARSE
|
||||
# if the config is set, follow that
|
||||
elif self.root_project.config.partial_parse is not None:
|
||||
return self.root_project.config.partial_parse
|
||||
else:
|
||||
return DEFAULT_PARTIAL_PARSE
|
||||
|
||||
def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
|
||||
if not self._partial_parse_enabled():
|
||||
if not flags.PARTIAL_PARSE:
|
||||
logger.debug('Partial parsing not enabled')
|
||||
return None
|
||||
path = os.path.join(self.root_project.target_path,
|
||||
@@ -574,7 +566,7 @@ class ManifestLoader:
|
||||
)
|
||||
reparse_reason = ReparseReason.load_file_failure
|
||||
else:
|
||||
logger.info(f"Unable to do partial parsing because {path} not found")
|
||||
logger.info("Partial parse save file not found. Starting full parse.")
|
||||
reparse_reason = ReparseReason.file_not_found
|
||||
|
||||
# this event is only fired if a full reparse is needed
|
||||
@@ -584,7 +576,7 @@ class ManifestLoader:
|
||||
|
||||
def build_perf_info(self):
|
||||
mli = ManifestLoaderInfo(
|
||||
is_partial_parse_enabled=self._partial_parse_enabled(),
|
||||
is_partial_parse_enabled=flags.PARTIAL_PARSE,
|
||||
is_static_analysis_enabled=flags.USE_EXPERIMENTAL_PARSER
|
||||
)
|
||||
for project in self.all_projects.values():
|
||||
@@ -615,7 +607,7 @@ class ManifestLoader:
|
||||
])
|
||||
)
|
||||
|
||||
profile_path = os.path.join(config.args.profiles_dir, 'profiles.yml')
|
||||
profile_path = os.path.join(flags.PROFILES_DIR, 'profiles.yml')
|
||||
with open(profile_path) as fp:
|
||||
profile_hash = FileHash.from_contents(fp.read())
|
||||
|
||||
@@ -648,7 +640,7 @@ class ManifestLoader:
|
||||
macro_parser = MacroParser(project, self.manifest)
|
||||
for path in macro_parser.get_paths():
|
||||
source_file = load_source_file(
|
||||
path, ParseFileType.Macro, project.project_name)
|
||||
path, ParseFileType.Macro, project.project_name, {})
|
||||
block = FileBlock(source_file)
|
||||
# This does not add the file to the manifest.files,
|
||||
# but that shouldn't be necessary here.
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
from dbt.context.context_config import ContextConfig
|
||||
from dbt.contracts.graph.parsed import ParsedModelNode
|
||||
import dbt.flags as flags
|
||||
import dbt.tracking
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FileBlock
|
||||
import dbt.tracking as tracking
|
||||
from dbt import utils
|
||||
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
|
||||
from functools import reduce
|
||||
from itertools import chain
|
||||
import random
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
|
||||
|
||||
class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
@@ -26,32 +28,52 @@ class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
def get_compiled_path(cls, block: FileBlock):
|
||||
return block.path.relative_path
|
||||
|
||||
# TODO when this is turned on by default, simplify the nasty if/else tree inside this method.
|
||||
def render_update(
|
||||
self, node: ParsedModelNode, config: ContextConfig
|
||||
) -> None:
|
||||
self.manifest._parsing_info.static_analysis_path_count += 1
|
||||
# TODO go back to 1/100 when this is turned on by default.
|
||||
# `True` roughly 1/50 times this function is called
|
||||
sample: bool = random.randint(1, 51) == 50
|
||||
|
||||
# `True` roughly 1/100 times this function is called
|
||||
sample: bool = random.randint(1, 101) == 100
|
||||
# top-level declaration of variables
|
||||
experimentally_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
|
||||
config_call_dict: Dict[str, Any] = {}
|
||||
source_calls: List[List[str]] = []
|
||||
|
||||
# run the experimental parser if the flag is on or if we're sampling
|
||||
if flags.USE_EXPERIMENTAL_PARSER or sample:
|
||||
try:
|
||||
experimentally_parsed: Dict[str, List[Any]] = py_extract_from_source(node.raw_sql)
|
||||
if self._has_banned_macro(node):
|
||||
# this log line is used for integration testing. If you change
|
||||
# the code at the beginning of the line change the tests in
|
||||
# test/integration/072_experimental_parser_tests/test_all_experimental_parser.py
|
||||
logger.debug(
|
||||
f"1601: parser fallback to jinja because of macro override for {node.path}"
|
||||
)
|
||||
experimentally_parsed = "has_banned_macro"
|
||||
else:
|
||||
# run the experimental parser and return the results
|
||||
try:
|
||||
experimentally_parsed = py_extract_from_source(
|
||||
node.raw_sql
|
||||
)
|
||||
logger.debug(f"1699: statically parsed {node.path}")
|
||||
# if we want information on what features are barring the experimental
|
||||
# parser from reading model files, this is where we would add that
|
||||
# since that information is stored in the `ExtractionError`.
|
||||
except ExtractionError:
|
||||
experimentally_parsed = "cannot_parse"
|
||||
|
||||
# second config format
|
||||
config_call_dict: Dict[str, Any] = {}
|
||||
for c in experimentally_parsed['configs']:
|
||||
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
|
||||
# if the parser succeeded, extract some data in easy-to-compare formats
|
||||
if isinstance(experimentally_parsed, dict):
|
||||
# create second config format
|
||||
for c in experimentally_parsed['configs']:
|
||||
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
|
||||
|
||||
# format sources TODO change extractor to match this type
|
||||
source_calls: List[List[str]] = []
|
||||
for s in experimentally_parsed['sources']:
|
||||
source_calls.append([s[0], s[1]])
|
||||
experimentally_parsed['sources'] = source_calls
|
||||
|
||||
except ExtractionError as e:
|
||||
experimentally_parsed = e
|
||||
# format sources TODO change extractor to match this type
|
||||
for s in experimentally_parsed['sources']:
|
||||
source_calls.append([s[0], s[1]])
|
||||
experimentally_parsed['sources'] = source_calls
|
||||
|
||||
# normal dbt run
|
||||
if not flags.USE_EXPERIMENTAL_PARSER:
|
||||
@@ -59,57 +81,19 @@ class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
super().render_update(node, config)
|
||||
# if we're sampling, compare for correctness
|
||||
if sample:
|
||||
result: List[str] = []
|
||||
# experimental parser couldn't parse
|
||||
if isinstance(experimentally_parsed, Exception):
|
||||
result += ["01_experimental_parser_cannot_parse"]
|
||||
else:
|
||||
# look for false positive configs
|
||||
for k in config_call_dict.keys():
|
||||
if k not in config._config_call_dict:
|
||||
result += ["02_false_positive_config_value"]
|
||||
break
|
||||
|
||||
# look for missed configs
|
||||
for k in config._config_call_dict.keys():
|
||||
if k not in config_call_dict:
|
||||
result += ["03_missed_config_value"]
|
||||
break
|
||||
|
||||
# look for false positive sources
|
||||
for s in experimentally_parsed['sources']:
|
||||
if s not in node.sources:
|
||||
result += ["04_false_positive_source_value"]
|
||||
break
|
||||
|
||||
# look for missed sources
|
||||
for s in node.sources:
|
||||
if s not in experimentally_parsed['sources']:
|
||||
result += ["05_missed_source_value"]
|
||||
break
|
||||
|
||||
# look for false positive refs
|
||||
for r in experimentally_parsed['refs']:
|
||||
if r not in node.refs:
|
||||
result += ["06_false_positive_ref_value"]
|
||||
break
|
||||
|
||||
# look for missed refs
|
||||
for r in node.refs:
|
||||
if r not in experimentally_parsed['refs']:
|
||||
result += ["07_missed_ref_value"]
|
||||
break
|
||||
|
||||
# if there are no errors, return a success value
|
||||
if not result:
|
||||
result = ["00_exact_match"]
|
||||
|
||||
result = _get_sample_result(
|
||||
experimentally_parsed,
|
||||
config_call_dict,
|
||||
source_calls,
|
||||
node,
|
||||
config
|
||||
)
|
||||
# fire a tracking event. this fires one event for every sample
|
||||
# so that we have data on a per file basis. Not only can we expect
|
||||
# no false positives or misses, we can expect the number model
|
||||
# files parseable by the experimental parser to match our internal
|
||||
# testing.
|
||||
if dbt.tracking.active_user is not None: # None in some tests
|
||||
if tracking.active_user is not None: # None in some tests
|
||||
tracking.track_experimental_parser_sample({
|
||||
"project_id": self.root_project.hashed_name(),
|
||||
"file_id": utils.get_hash(node),
|
||||
@@ -117,7 +101,7 @@ class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
})
|
||||
|
||||
# if the --use-experimental-parser flag was set, and the experimental parser succeeded
|
||||
elif not isinstance(experimentally_parsed, Exception):
|
||||
elif isinstance(experimentally_parsed, Dict):
|
||||
# since it doesn't need python jinja, fit the refs, sources, and configs
|
||||
# into the node. Down the line the rest of the node will be updated with
|
||||
# this information. (e.g. depends_on etc.)
|
||||
@@ -141,7 +125,102 @@ class ModelParser(SimpleSQLParser[ParsedModelNode]):
|
||||
|
||||
self.manifest._parsing_info.static_analysis_parsed_path_count += 1
|
||||
|
||||
# the experimental parser tried and failed on this model.
|
||||
# the experimental parser didn't run on this model.
|
||||
# fall back to python jinja rendering.
|
||||
elif experimentally_parsed in ["has_banned_macro"]:
|
||||
# not logging here since the reason should have been logged above
|
||||
super().render_update(node, config)
|
||||
# the experimental parser ran on this model and failed.
|
||||
# fall back to python jinja rendering.
|
||||
else:
|
||||
logger.debug(
|
||||
f"1602: parser fallback to jinja because of extractor failure for {node.path}"
|
||||
)
|
||||
super().render_update(node, config)
|
||||
|
||||
# checks for banned macros
|
||||
def _has_banned_macro(
|
||||
self, node: ParsedModelNode
|
||||
) -> bool:
|
||||
# first check if there is a banned macro defined in scope for this model file
|
||||
root_project_name = self.root_project.project_name
|
||||
project_name = node.package_name
|
||||
banned_macros = ['ref', 'source', 'config']
|
||||
|
||||
all_banned_macro_keys: Iterator[str] = chain.from_iterable(
|
||||
map(
|
||||
lambda name: [
|
||||
f"macro.{project_name}.{name}",
|
||||
f"macro.{root_project_name}.{name}"
|
||||
],
|
||||
banned_macros
|
||||
)
|
||||
)
|
||||
|
||||
return reduce(
|
||||
lambda z, key: z or (key in self.manifest.macros),
|
||||
all_banned_macro_keys,
|
||||
False
|
||||
)
|
||||
|
||||
|
||||
# returns a list of string codes to be sent as a tracking event
|
||||
def _get_sample_result(
|
||||
sample_output: Optional[Union[str, Dict[str, Any]]],
|
||||
config_call_dict: Dict[str, Any],
|
||||
source_calls: List[List[str]],
|
||||
node: ParsedModelNode,
|
||||
config: ContextConfig
|
||||
) -> List[str]:
|
||||
result: List[str] = []
|
||||
# experimental parser didn't run
|
||||
if sample_output is None:
|
||||
result += ["09_experimental_parser_skipped"]
|
||||
# experimental parser couldn't parse
|
||||
elif (isinstance(sample_output, str)):
|
||||
if sample_output == "cannot_parse":
|
||||
result += ["01_experimental_parser_cannot_parse"]
|
||||
elif sample_output == "has_banned_macro":
|
||||
result += ["08_has_banned_macro"]
|
||||
else:
|
||||
# look for false positive configs
|
||||
for k in config_call_dict.keys():
|
||||
if k not in config._config_call_dict:
|
||||
result += ["02_false_positive_config_value"]
|
||||
break
|
||||
|
||||
# look for missed configs
|
||||
for k in config._config_call_dict.keys():
|
||||
if k not in config_call_dict:
|
||||
result += ["03_missed_config_value"]
|
||||
break
|
||||
|
||||
# look for false positive sources
|
||||
for s in sample_output['sources']:
|
||||
if s not in node.sources:
|
||||
result += ["04_false_positive_source_value"]
|
||||
break
|
||||
|
||||
# look for missed sources
|
||||
for s in node.sources:
|
||||
if s not in sample_output['sources']:
|
||||
result += ["05_missed_source_value"]
|
||||
break
|
||||
|
||||
# look for false positive refs
|
||||
for r in sample_output['refs']:
|
||||
if r not in node.refs:
|
||||
result += ["06_false_positive_ref_value"]
|
||||
break
|
||||
|
||||
# look for missed refs
|
||||
for r in node.refs:
|
||||
if r not in sample_output['refs']:
|
||||
result += ["07_missed_ref_value"]
|
||||
break
|
||||
|
||||
# if there are no errors, return a success value
|
||||
if not result:
|
||||
result = ["00_exact_match"]
|
||||
|
||||
return result
|
||||
|
||||
@@ -105,10 +105,10 @@ class PartialParsing:
|
||||
}
|
||||
if changed_or_deleted_macro_file:
|
||||
self.macro_child_map = self.saved_manifest.build_macro_child_map()
|
||||
logger.info(f"Partial parsing enabled: "
|
||||
f"{len(deleted) + len(deleted_schema_files)} files deleted, "
|
||||
f"{len(added)} files added, "
|
||||
f"{len(changed) + len(changed_schema_files)} files changed.")
|
||||
logger.debug(f"Partial parsing enabled: "
|
||||
f"{len(deleted) + len(deleted_schema_files)} files deleted, "
|
||||
f"{len(added)} files added, "
|
||||
f"{len(changed) + len(changed_schema_files)} files changed.")
|
||||
self.file_diff = file_diff
|
||||
|
||||
# generate the list of files that need parsing
|
||||
|
||||
@@ -12,13 +12,27 @@ from typing import Optional
|
||||
# This loads the files contents and creates the SourceFile object
|
||||
def load_source_file(
|
||||
path: FilePath, parse_file_type: ParseFileType,
|
||||
project_name: str) -> Optional[AnySourceFile]:
|
||||
file_contents = load_file_contents(path.absolute_path, strip=False)
|
||||
checksum = FileHash.from_contents(file_contents)
|
||||
project_name: str, saved_files,) -> Optional[AnySourceFile]:
|
||||
|
||||
sf_cls = SchemaSourceFile if parse_file_type == ParseFileType.Schema else SourceFile
|
||||
source_file = sf_cls(path=path, checksum=checksum,
|
||||
source_file = sf_cls(path=path, checksum=FileHash.empty(),
|
||||
parse_file_type=parse_file_type, project_name=project_name)
|
||||
source_file.contents = file_contents.strip()
|
||||
|
||||
skip_loading_schema_file = False
|
||||
if (parse_file_type == ParseFileType.Schema and
|
||||
saved_files and source_file.file_id in saved_files):
|
||||
old_source_file = saved_files[source_file.file_id]
|
||||
if (source_file.path.modification_time != 0.0 and
|
||||
old_source_file.path.modification_time == source_file.path.modification_time):
|
||||
source_file.checksum = old_source_file.checksum
|
||||
source_file.dfy = old_source_file.dfy
|
||||
skip_loading_schema_file = True
|
||||
|
||||
if not skip_loading_schema_file:
|
||||
file_contents = load_file_contents(path.absolute_path, strip=False)
|
||||
source_file.checksum = FileHash.from_contents(file_contents)
|
||||
source_file.contents = file_contents.strip()
|
||||
|
||||
if parse_file_type == ParseFileType.Schema and source_file.contents:
|
||||
dfy = yaml_from_file(source_file)
|
||||
if dfy:
|
||||
@@ -69,7 +83,7 @@ def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
|
||||
|
||||
# Use the FilesystemSearcher to get a bunch of FilePaths, then turn
|
||||
# them into a bunch of FileSource objects
|
||||
def get_source_files(project, paths, extension, parse_file_type):
|
||||
def get_source_files(project, paths, extension, parse_file_type, saved_files):
|
||||
# file path list
|
||||
fp_list = list(FilesystemSearcher(
|
||||
project, paths, extension
|
||||
@@ -80,17 +94,17 @@ def get_source_files(project, paths, extension, parse_file_type):
|
||||
if parse_file_type == ParseFileType.Seed:
|
||||
fb_list.append(load_seed_source_file(fp, project.project_name))
|
||||
else:
|
||||
file = load_source_file(fp, parse_file_type, project.project_name)
|
||||
file = load_source_file(fp, parse_file_type, project.project_name, saved_files)
|
||||
# only append the list if it has contents. added to fix #3568
|
||||
if file:
|
||||
fb_list.append(file)
|
||||
return fb_list
|
||||
|
||||
|
||||
def read_files_for_parser(project, files, dirs, extension, parse_ft):
|
||||
def read_files_for_parser(project, files, dirs, extension, parse_ft, saved_files):
|
||||
parser_files = []
|
||||
source_files = get_source_files(
|
||||
project, dirs, extension, parse_ft
|
||||
project, dirs, extension, parse_ft, saved_files
|
||||
)
|
||||
for sf in source_files:
|
||||
files[sf.file_id] = sf
|
||||
@@ -102,46 +116,46 @@ def read_files_for_parser(project, files, dirs, extension, parse_ft):
|
||||
# dictionary needs to be passed in. What determines the order of
|
||||
# the various projects? Is the root project always last? Do the
|
||||
# non-root projects need to be done separately in order?
|
||||
def read_files(project, files, parser_files):
|
||||
def read_files(project, files, parser_files, saved_files):
|
||||
|
||||
project_files = {}
|
||||
|
||||
project_files['MacroParser'] = read_files_for_parser(
|
||||
project, files, project.macro_paths, '.sql', ParseFileType.Macro,
|
||||
project, files, project.macro_paths, '.sql', ParseFileType.Macro, saved_files
|
||||
)
|
||||
|
||||
project_files['ModelParser'] = read_files_for_parser(
|
||||
project, files, project.source_paths, '.sql', ParseFileType.Model,
|
||||
project, files, project.source_paths, '.sql', ParseFileType.Model, saved_files
|
||||
)
|
||||
|
||||
project_files['SnapshotParser'] = read_files_for_parser(
|
||||
project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot,
|
||||
project, files, project.snapshot_paths, '.sql', ParseFileType.Snapshot, saved_files
|
||||
)
|
||||
|
||||
project_files['AnalysisParser'] = read_files_for_parser(
|
||||
project, files, project.analysis_paths, '.sql', ParseFileType.Analysis,
|
||||
project, files, project.analysis_paths, '.sql', ParseFileType.Analysis, saved_files
|
||||
)
|
||||
|
||||
project_files['DataTestParser'] = read_files_for_parser(
|
||||
project, files, project.test_paths, '.sql', ParseFileType.Test,
|
||||
project, files, project.test_paths, '.sql', ParseFileType.Test, saved_files
|
||||
)
|
||||
|
||||
project_files['SeedParser'] = read_files_for_parser(
|
||||
project, files, project.data_paths, '.csv', ParseFileType.Seed,
|
||||
project, files, project.data_paths, '.csv', ParseFileType.Seed, saved_files
|
||||
)
|
||||
|
||||
project_files['DocumentationParser'] = read_files_for_parser(
|
||||
project, files, project.docs_paths, '.md', ParseFileType.Documentation,
|
||||
project, files, project.docs_paths, '.md', ParseFileType.Documentation, saved_files
|
||||
)
|
||||
|
||||
project_files['SchemaParser'] = read_files_for_parser(
|
||||
project, files, project.all_source_paths, '.yml', ParseFileType.Schema,
|
||||
project, files, project.all_source_paths, '.yml', ParseFileType.Schema, saved_files
|
||||
)
|
||||
|
||||
# Also read .yaml files for schema files. Might be better to change
|
||||
# 'read_files_for_parser' accept an array in the future.
|
||||
yaml_files = read_files_for_parser(
|
||||
project, files, project.all_source_paths, '.yaml', ParseFileType.Schema,
|
||||
project, files, project.all_source_paths, '.yaml', ParseFileType.Schema, saved_files
|
||||
)
|
||||
project_files['SchemaParser'].extend(yaml_files)
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ class FilesystemSearcher(Iterable[FilePath]):
|
||||
file_match = FilePath(
|
||||
searched_path=result['searched_path'],
|
||||
relative_path=result['relative_path'],
|
||||
modification_time=result['modification_time'],
|
||||
project_root=root,
|
||||
)
|
||||
yield file_match
|
||||
|
||||
@@ -67,15 +67,16 @@ class BootstrapProcess(dbt.flags.MP_CONTEXT.Process):
|
||||
keeps everything in memory.
|
||||
"""
|
||||
# reset flags
|
||||
dbt.flags.set_from_args(self.task.args)
|
||||
user_config = None
|
||||
if self.task.config is not None:
|
||||
user_config = self.task.config.user_config
|
||||
dbt.flags.set_from_args(self.task.args, user_config)
|
||||
dbt.tracking.initialize_from_flags()
|
||||
# reload the active plugin
|
||||
load_plugin(self.task.config.credentials.type)
|
||||
# register it
|
||||
register_adapter(self.task.config)
|
||||
|
||||
# reset tracking, etc
|
||||
self.task.config.config.set_values(self.task.args.profiles_dir)
|
||||
|
||||
def task_exec(self) -> None:
|
||||
"""task_exec runs first inside the child process"""
|
||||
if type(self.task) != RemoteListTask:
|
||||
|
||||
@@ -7,6 +7,7 @@ from typing import Type, Union, Dict, Any, Optional
|
||||
|
||||
from dbt import tracking
|
||||
from dbt import ui
|
||||
from dbt import flags
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.results import (
|
||||
NodeStatus, RunResult, collect_timing_info, RunStatus
|
||||
@@ -21,7 +22,7 @@ from .printer import print_skip_caused_by_error, print_skip_line
|
||||
|
||||
from dbt.adapters.factory import register_adapter
|
||||
from dbt.config import RuntimeConfig, Project
|
||||
from dbt.config.profile import read_profile, PROFILES_DIR
|
||||
from dbt.config.profile import read_profile
|
||||
import dbt.exceptions
|
||||
|
||||
|
||||
@@ -34,7 +35,7 @@ class NoneConfig:
|
||||
def read_profiles(profiles_dir=None):
|
||||
"""This is only used for some error handling"""
|
||||
if profiles_dir is None:
|
||||
profiles_dir = PROFILES_DIR
|
||||
profiles_dir = flags.PROFILES_DIR
|
||||
|
||||
raw_profiles = read_profile(profiles_dir)
|
||||
|
||||
@@ -69,6 +70,13 @@ class BaseTask(metaclass=ABCMeta):
|
||||
else:
|
||||
log_manager.format_text()
|
||||
|
||||
@classmethod
|
||||
def set_log_format(cls):
|
||||
if flags.LOG_FORMAT == 'json':
|
||||
log_manager.format_json()
|
||||
else:
|
||||
log_manager.format_text()
|
||||
|
||||
@classmethod
|
||||
def from_args(cls, args):
|
||||
try:
|
||||
@@ -85,7 +93,7 @@ class BaseTask(metaclass=ABCMeta):
|
||||
logger.error("Encountered an error while reading profiles:")
|
||||
logger.error(" ERROR {}".format(str(exc)))
|
||||
|
||||
all_profiles = read_profiles(args.profiles_dir).keys()
|
||||
all_profiles = read_profiles(flags.PROFILES_DIR).keys()
|
||||
|
||||
if len(all_profiles) > 0:
|
||||
logger.info("Defined profiles:")
|
||||
|
||||
@@ -3,19 +3,22 @@ from .snapshot import SnapshotRunner as snapshot_model_runner
|
||||
from .seed import SeedRunner as seed_runner
|
||||
from .test import TestRunner as test_runner
|
||||
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.contracts.results import NodeStatus
|
||||
from dbt.exceptions import InternalException
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.task.test import TestSelector
|
||||
|
||||
|
||||
class BuildTask(RunTask):
|
||||
"""The Build task processes all assets of a given process and attempts to 'build'
|
||||
them in an opinionated fashion. Every resource type outlined in RUNNER_MAP
|
||||
will be processed by the mapped runner class.
|
||||
"""The Build task processes all assets of a given process and attempts to
|
||||
'build' them in an opinionated fashion. Every resource type outlined in
|
||||
RUNNER_MAP will be processed by the mapped runner class.
|
||||
|
||||
I.E. a resource of type Model is handled by the ModelRunner which is imported
|
||||
as run_model_runner.
|
||||
"""
|
||||
I.E. a resource of type Model is handled by the ModelRunner which is
|
||||
imported as run_model_runner. """
|
||||
|
||||
MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error, NodeStatus.Fail]
|
||||
|
||||
RUNNER_MAP = {
|
||||
NodeType.Model: run_model_runner,
|
||||
@@ -23,6 +26,20 @@ class BuildTask(RunTask):
|
||||
NodeType.Seed: seed_runner,
|
||||
NodeType.Test: test_runner,
|
||||
}
|
||||
ALL_RESOURCE_VALUES = frozenset({x for x in RUNNER_MAP.keys()})
|
||||
|
||||
@property
|
||||
def resource_types(self):
|
||||
if not self.args.resource_types:
|
||||
return list(self.ALL_RESOURCE_VALUES)
|
||||
|
||||
values = set(self.args.resource_types)
|
||||
|
||||
if 'all' in values:
|
||||
values.remove('all')
|
||||
values.update(self.ALL_RESOURCE_VALUES)
|
||||
|
||||
return list(values)
|
||||
|
||||
def get_node_selector(self) -> ResourceTypeSelector:
|
||||
if self.manifest is None or self.graph is None:
|
||||
@@ -30,11 +47,19 @@ class BuildTask(RunTask):
|
||||
'manifest and graph must be set to get node selection'
|
||||
)
|
||||
|
||||
resource_types = self.resource_types
|
||||
|
||||
if resource_types == [NodeType.Test]:
|
||||
return TestSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
)
|
||||
return ResourceTypeSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
resource_types=[x for x in self.RUNNER_MAP.keys()],
|
||||
resource_types=resource_types,
|
||||
)
|
||||
|
||||
def get_runner_type(self, node):
|
||||
|
||||
@@ -4,7 +4,7 @@ from .base import BaseRunner
|
||||
|
||||
from dbt.contracts.results import RunStatus, RunResult
|
||||
from dbt.exceptions import InternalException
|
||||
from dbt.graph import ResourceTypeSelector, SelectionSpec, parse_difference
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.logger import print_timestamped_line
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
@@ -37,13 +37,6 @@ class CompileTask(GraphRunnableTask):
|
||||
def raise_on_first_error(self):
|
||||
return True
|
||||
|
||||
def get_selection_spec(self) -> SelectionSpec:
|
||||
if self.args.selector_name:
|
||||
spec = self.config.get_selector(self.args.selector_name)
|
||||
else:
|
||||
spec = parse_difference(self.args.select, self.args.exclude)
|
||||
return spec
|
||||
|
||||
def get_node_selector(self) -> ResourceTypeSelector:
|
||||
if self.manifest is None or self.graph is None:
|
||||
raise InternalException(
|
||||
|
||||
@@ -5,10 +5,11 @@ import sys
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt import flags
|
||||
import dbt.clients.system
|
||||
import dbt.exceptions
|
||||
from dbt.adapters.factory import get_adapter, register_adapter
|
||||
from dbt.config import Project, Profile, PROFILES_DIR
|
||||
from dbt.config import Project, Profile
|
||||
from dbt.config.renderer import DbtProjectYamlRenderer, ProfileRenderer
|
||||
from dbt.config.utils import parse_cli_vars
|
||||
from dbt.context.base import generate_base_context
|
||||
@@ -69,7 +70,7 @@ class QueryCommentedProfile(Profile):
|
||||
class DebugTask(BaseTask):
|
||||
def __init__(self, args, config):
|
||||
super().__init__(args, config)
|
||||
self.profiles_dir = getattr(self.args, 'profiles_dir', PROFILES_DIR)
|
||||
self.profiles_dir = flags.PROFILES_DIR
|
||||
self.profile_path = os.path.join(self.profiles_dir, 'profiles.yml')
|
||||
try:
|
||||
self.project_dir = get_nearest_project_dir(self.args)
|
||||
@@ -156,7 +157,7 @@ class DebugTask(BaseTask):
|
||||
self.project = Project.from_project_root(
|
||||
self.project_dir,
|
||||
renderer,
|
||||
verify_version=getattr(self.args, 'version_check', False),
|
||||
verify_version=flags.VERSION_CHECK,
|
||||
)
|
||||
except dbt.exceptions.DbtConfigError as exc:
|
||||
self.project_fail_details = str(exc)
|
||||
@@ -195,7 +196,7 @@ class DebugTask(BaseTask):
|
||||
try:
|
||||
partial = Project.partial_load(
|
||||
os.path.dirname(self.project_path),
|
||||
verify_version=getattr(self.args, 'version_check', False),
|
||||
verify_version=bool(flags.VERSION_CHECK),
|
||||
)
|
||||
renderer = DbtProjectYamlRenderer(
|
||||
generate_base_context(self.cli_vars)
|
||||
|
||||
@@ -19,7 +19,7 @@ from dbt.exceptions import RuntimeException, InternalException
|
||||
from dbt.logger import print_timestamped_line
|
||||
from dbt.node_types import NodeType
|
||||
|
||||
from dbt.graph import ResourceTypeSelector, SelectionSpec, parse_difference
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.contracts.graph.parsed import ParsedSourceDefinition
|
||||
|
||||
|
||||
@@ -136,19 +136,6 @@ class FreshnessTask(GraphRunnableTask):
|
||||
def raise_on_first_error(self):
|
||||
return False
|
||||
|
||||
def get_selection_spec(self) -> SelectionSpec:
|
||||
"""Generates a selection spec from task arguments to use when
|
||||
processing graph. A SelectionSpec describes what nodes to select
|
||||
when creating queue from graph of nodes.
|
||||
"""
|
||||
if self.args.selector_name:
|
||||
# use pre-defined selector (--selector) to create selection spec
|
||||
spec = self.config.get_selector(self.args.selector_name)
|
||||
else:
|
||||
# use --select and --exclude args to create selection spec
|
||||
spec = parse_difference(self.args.select, self.args.exclude)
|
||||
return spec
|
||||
|
||||
def get_node_selector(self):
|
||||
if self.manifest is None or self.graph is None:
|
||||
raise InternalException(
|
||||
|
||||
@@ -3,6 +3,7 @@ import shutil
|
||||
|
||||
import dbt.config
|
||||
import dbt.clients.system
|
||||
from dbt import flags
|
||||
from dbt.version import _get_adapter_plugin_names
|
||||
from dbt.adapters.factory import load_plugin, get_include_paths
|
||||
|
||||
@@ -93,7 +94,7 @@ class InitTask(BaseTask):
|
||||
except StopIteration:
|
||||
logger.debug("No adapters installed, skipping")
|
||||
|
||||
profiles_dir = dbt.config.PROFILES_DIR
|
||||
profiles_dir = flags.PROFILES_DIR
|
||||
profiles_file = os.path.join(profiles_dir, 'profiles.yml')
|
||||
|
||||
self.create_profiles_dir(profiles_dir)
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
import json
|
||||
from typing import Type
|
||||
|
||||
from dbt.contracts.graph.parsed import (
|
||||
ParsedExposure,
|
||||
ParsedSourceDefinition
|
||||
)
|
||||
from dbt.graph import (
|
||||
parse_difference,
|
||||
ResourceTypeSelector,
|
||||
SelectionSpec,
|
||||
)
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.task.runnable import GraphRunnableTask, ManifestTask
|
||||
from dbt.task.test import TestSelector
|
||||
from dbt.node_types import NodeType
|
||||
@@ -165,25 +160,19 @@ class ListTask(GraphRunnableTask):
|
||||
return list(values)
|
||||
|
||||
@property
|
||||
def selector(self):
|
||||
def selection_arg(self):
|
||||
# for backwards compatibility, list accepts both --models and --select,
|
||||
# with slightly different behavior: --models implies --resource-type model
|
||||
if self.args.models:
|
||||
return self.args.models
|
||||
else:
|
||||
return self.args.select
|
||||
|
||||
def get_selection_spec(self) -> SelectionSpec:
|
||||
if self.args.selector_name:
|
||||
spec = self.config.get_selector(self.args.selector_name)
|
||||
else:
|
||||
spec = parse_difference(self.selector, self.args.exclude)
|
||||
return spec
|
||||
|
||||
def get_node_selector(self):
|
||||
if self.manifest is None or self.graph is None:
|
||||
raise InternalException(
|
||||
'manifest and graph must be set to get perform node selection'
|
||||
)
|
||||
cls: Type[ResourceTypeSelector]
|
||||
if self.resource_types == [NodeType.Test]:
|
||||
return TestSelector(
|
||||
graph=self.graph,
|
||||
|
||||
@@ -30,8 +30,8 @@ def print_fancy_output_line(
|
||||
progress=progress,
|
||||
message=msg)
|
||||
|
||||
truncate_width = ui.PRINTER_WIDTH - 3
|
||||
justified = prefix.ljust(ui.PRINTER_WIDTH, ".")
|
||||
truncate_width = ui.printer_width() - 3
|
||||
justified = prefix.ljust(ui.printer_width(), ".")
|
||||
if truncate and len(justified) > truncate_width:
|
||||
justified = justified[:truncate_width] + '...'
|
||||
|
||||
|
||||
@@ -320,13 +320,12 @@ class RemoteListTask(
|
||||
|
||||
|
||||
class RemoteBuildProjectTask(RPCCommandTask[RPCBuildParameters], BuildTask):
|
||||
|
||||
METHOD_NAME = 'build'
|
||||
|
||||
def set_args(self, params: RPCBuildParameters) -> None:
|
||||
if params.models:
|
||||
self.args.select = self._listify(params.models)
|
||||
else:
|
||||
self.args.select = self._listify(params.select)
|
||||
self.args.resource_types = self._listify(params.resource_types)
|
||||
self.args.select = self._listify(params.select)
|
||||
self.args.exclude = self._listify(params.exclude)
|
||||
self.args.selector_name = params.selector
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ from .printer import (
|
||||
get_counts,
|
||||
)
|
||||
|
||||
from dbt import deprecations
|
||||
from dbt import tracking
|
||||
from dbt import utils
|
||||
from dbt.adapters.base import BaseRelation
|
||||
@@ -209,11 +208,12 @@ class ModelRunner(CompileRunner):
|
||||
self, result: Any, model
|
||||
) -> List[BaseRelation]:
|
||||
if isinstance(result, str):
|
||||
deprecations.warn('materialization-return',
|
||||
materialization=model.get_materialization())
|
||||
return [
|
||||
self.adapter.Relation.create_from(self.config, model)
|
||||
]
|
||||
msg = (
|
||||
'The materialization ("{}") did not explicitly return a '
|
||||
'list of relations to add to the cache.'
|
||||
.format(str(model.get_materialization()))
|
||||
)
|
||||
raise CompilationException(msg, node=model)
|
||||
|
||||
if isinstance(result, dict):
|
||||
return _validate_materialization_relations_dict(result, model)
|
||||
|
||||
@@ -41,7 +41,13 @@ from dbt.exceptions import (
|
||||
FailFastException,
|
||||
)
|
||||
|
||||
from dbt.graph import GraphQueue, NodeSelector, SelectionSpec, Graph
|
||||
from dbt.graph import (
|
||||
GraphQueue,
|
||||
NodeSelector,
|
||||
SelectionSpec,
|
||||
parse_difference,
|
||||
Graph
|
||||
)
|
||||
from dbt.parser.manifest import ManifestLoader
|
||||
|
||||
import dbt.exceptions
|
||||
@@ -83,6 +89,9 @@ class ManifestTask(ConfiguredTask):
|
||||
|
||||
|
||||
class GraphRunnableTask(ManifestTask):
|
||||
|
||||
MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error]
|
||||
|
||||
def __init__(self, args, config):
|
||||
super().__init__(args, config)
|
||||
self.job_queue: Optional[GraphQueue] = None
|
||||
@@ -103,11 +112,27 @@ class GraphRunnableTask(ManifestTask):
|
||||
def index_offset(self, value: int) -> int:
|
||||
return value
|
||||
|
||||
@abstractmethod
|
||||
@property
|
||||
def selection_arg(self):
|
||||
return self.args.select
|
||||
|
||||
@property
|
||||
def exclusion_arg(self):
|
||||
return self.args.exclude
|
||||
|
||||
def get_selection_spec(self) -> SelectionSpec:
|
||||
raise NotImplementedException(
|
||||
f'get_selection_spec not implemented for task {type(self)}'
|
||||
)
|
||||
default_selector_name = self.config.get_default_selector_name()
|
||||
if self.args.selector_name:
|
||||
# use pre-defined selector (--selector)
|
||||
spec = self.config.get_selector(self.args.selector_name)
|
||||
elif not (self.selection_arg or self.exclusion_arg) and default_selector_name:
|
||||
# use pre-defined selector (--selector) with default: true
|
||||
logger.info(f"Using default selector {default_selector_name}")
|
||||
spec = self.config.get_selector(default_selector_name)
|
||||
else:
|
||||
# use --select and --exclude args
|
||||
spec = parse_difference(self.selection_arg, self.exclusion_arg)
|
||||
return spec
|
||||
|
||||
@abstractmethod
|
||||
def get_node_selector(self) -> NodeSelector:
|
||||
@@ -189,7 +214,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
logger.debug('Finished running node {}'.format(
|
||||
runner.node.unique_id))
|
||||
|
||||
fail_fast = getattr(self.config.args, 'fail_fast', False)
|
||||
fail_fast = flags.FAIL_FAST
|
||||
|
||||
if result.status in (NodeStatus.Error, NodeStatus.Fail) and fail_fast:
|
||||
self._raise_next_tick = FailFastException(
|
||||
@@ -256,7 +281,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
self._submit(pool, args, callback)
|
||||
|
||||
# block on completion
|
||||
if getattr(self.config.args, 'fail_fast', False):
|
||||
if flags.FAIL_FAST:
|
||||
# checkout for an errors after task completion in case of
|
||||
# fast failure
|
||||
while self.job_queue.wait_until_something_was_done():
|
||||
@@ -289,7 +314,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
else:
|
||||
self.manifest.update_node(node)
|
||||
|
||||
if result.status == NodeStatus.Error:
|
||||
if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
|
||||
if is_ephemeral:
|
||||
cause = result
|
||||
else:
|
||||
@@ -413,7 +438,7 @@ class GraphRunnableTask(ManifestTask):
|
||||
)
|
||||
|
||||
if len(self._flattened_nodes) == 0:
|
||||
logger.warning("WARNING: Nothing to do. Try checking your model "
|
||||
logger.warning("\nWARNING: Nothing to do. Try checking your model "
|
||||
"configs and model specification args")
|
||||
result = self.get_result(
|
||||
results=[],
|
||||
@@ -546,7 +571,11 @@ class GraphRunnableTask(ManifestTask):
|
||||
)
|
||||
|
||||
def args_to_dict(self):
|
||||
var_args = vars(self.args)
|
||||
var_args = vars(self.args).copy()
|
||||
# update the args with the flags, which could also come from environment
|
||||
# variables or user_config
|
||||
flag_dict = flags.get_flag_dict()
|
||||
var_args.update(flag_dict)
|
||||
dict_args = {}
|
||||
# remove args keys that clutter up the dictionary
|
||||
for key in var_args:
|
||||
@@ -554,10 +583,11 @@ class GraphRunnableTask(ManifestTask):
|
||||
continue
|
||||
if var_args[key] is None:
|
||||
continue
|
||||
# TODO: add more default_false_keys
|
||||
default_false_keys = (
|
||||
'debug', 'full_refresh', 'fail_fast', 'warn_error',
|
||||
'single_threaded', 'test_new_parser', 'log_cache_events',
|
||||
'strict'
|
||||
'single_threaded', 'log_cache_events',
|
||||
'use_experimental_parser',
|
||||
)
|
||||
if key in default_false_keys and var_args[key] is False:
|
||||
continue
|
||||
|
||||
@@ -5,6 +5,7 @@ from dbt.clients.yaml_helper import ( # noqa:F401
|
||||
)
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt import version as dbt_version
|
||||
from dbt import flags
|
||||
from snowplow_tracker import Subject, Tracker, Emitter, logger as sp_logger
|
||||
from snowplow_tracker import SelfDescribingJson
|
||||
from datetime import datetime
|
||||
@@ -184,7 +185,6 @@ def get_invocation_context(user, config, args):
|
||||
"command": args.which,
|
||||
"options": None,
|
||||
"version": str(dbt_version.installed),
|
||||
|
||||
"run_type": get_run_type(args),
|
||||
"adapter_type": adapter_type,
|
||||
"adapter_unique_id": adapter_unique_id,
|
||||
@@ -509,3 +509,11 @@ class InvocationProcessor(logbook.Processor):
|
||||
"run_started_at": active_user.run_started_at.isoformat(),
|
||||
"invocation_id": active_user.invocation_id,
|
||||
})
|
||||
|
||||
|
||||
def initialize_from_flags():
|
||||
# Setting these used to be in UserConfig, but had to be moved here
|
||||
if flags.SEND_ANONYMOUS_USAGE_STATS:
|
||||
initialize_tracking(flags.PROFILES_DIR)
|
||||
else:
|
||||
do_not_track()
|
||||
|
||||
@@ -17,17 +17,6 @@ COLOR_FG_GREEN = COLORS['green']
|
||||
COLOR_FG_YELLOW = COLORS['yellow']
|
||||
COLOR_RESET_ALL = COLORS['reset_all']
|
||||
|
||||
PRINTER_WIDTH = 80
|
||||
|
||||
|
||||
def use_colors(use_colors_val=True):
|
||||
flags.USE_COLORS = use_colors_val
|
||||
|
||||
|
||||
def printer_width(printer_width):
|
||||
global PRINTER_WIDTH
|
||||
PRINTER_WIDTH = printer_width
|
||||
|
||||
|
||||
def color(text: str, color_code: str):
|
||||
if flags.USE_COLORS:
|
||||
@@ -36,6 +25,12 @@ def color(text: str, color_code: str):
|
||||
return text
|
||||
|
||||
|
||||
def printer_width():
|
||||
if flags.PRINTER_WIDTH:
|
||||
return flags.PRINTER_WIDTH
|
||||
return 80
|
||||
|
||||
|
||||
def green(text: str):
|
||||
return color(text, COLOR_FG_GREEN)
|
||||
|
||||
@@ -56,7 +51,7 @@ def line_wrap_message(
|
||||
newlines to newlines and avoid calling textwrap.fill() on them (like
|
||||
markdown)
|
||||
'''
|
||||
width = PRINTER_WIDTH - subtract
|
||||
width = printer_width() - subtract
|
||||
if dedent:
|
||||
msg = textwrap.dedent(msg)
|
||||
|
||||
|
||||
@@ -96,5 +96,5 @@ def _get_dbt_plugins_info():
|
||||
yield plugin_name, mod.version
|
||||
|
||||
|
||||
__version__ = '0.21.0b1'
|
||||
__version__ = '0.21.0rc1'
|
||||
installed = get_installed_version()
|
||||
|
||||
@@ -284,12 +284,12 @@ def parse_args(argv=None):
|
||||
parser.add_argument('adapter')
|
||||
parser.add_argument('--title-case', '-t', default=None)
|
||||
parser.add_argument('--dependency', action='append')
|
||||
parser.add_argument('--dbt-core-version', default='0.21.0b1')
|
||||
parser.add_argument('--dbt-core-version', default='0.21.0rc1')
|
||||
parser.add_argument('--email')
|
||||
parser.add_argument('--author')
|
||||
parser.add_argument('--url')
|
||||
parser.add_argument('--sql', action='store_true')
|
||||
parser.add_argument('--package-version', default='0.21.0b1')
|
||||
parser.add_argument('--package-version', default='0.21.0rc1')
|
||||
parser.add_argument('--project-version', default='1.0')
|
||||
parser.add_argument(
|
||||
'--no-dependency', action='store_false', dest='set_dependency'
|
||||
|
||||
@@ -24,7 +24,7 @@ def read(fname):
|
||||
|
||||
|
||||
package_name = "dbt-core"
|
||||
package_version = "0.21.0b1"
|
||||
package_version = "0.21.0rc1"
|
||||
description = """dbt (data build tool) is a command line tool that helps \
|
||||
analysts and engineers transform data in their warehouse more effectively"""
|
||||
|
||||
|
||||
75
docker/requirements/requirements.0.21.0b2.txt
Normal file
75
docker/requirements/requirements.0.21.0b2.txt
Normal file
@@ -0,0 +1,75 @@
|
||||
agate==1.6.1
|
||||
asn1crypto==1.4.0
|
||||
attrs==21.2.0
|
||||
azure-common==1.1.27
|
||||
azure-core==1.17.0
|
||||
azure-storage-blob==12.8.1
|
||||
Babel==2.9.1
|
||||
boto3==1.18.25
|
||||
botocore==1.21.25
|
||||
cachetools==4.2.2
|
||||
certifi==2021.5.30
|
||||
cffi==1.14.6
|
||||
chardet==4.0.0
|
||||
charset-normalizer==2.0.4
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.7
|
||||
google-api-core==1.31.2
|
||||
google-auth==1.35.0
|
||||
google-cloud-bigquery==2.24.1
|
||||
google-cloud-core==1.7.2
|
||||
google-crc32c==1.1.2
|
||||
google-resumable-media==2.0.0
|
||||
googleapis-common-protos==1.53.0
|
||||
grpcio==1.39.0
|
||||
hologram==0.0.14
|
||||
idna==3.2
|
||||
importlib-metadata==4.6.4
|
||||
isodate==0.6.0
|
||||
jeepney==0.7.1
|
||||
Jinja2==2.11.3
|
||||
jmespath==0.10.0
|
||||
json-rpc==1.13.0
|
||||
jsonschema==3.1.1
|
||||
keyring==21.8.0
|
||||
leather==0.3.3
|
||||
Logbook==1.5.3
|
||||
MarkupSafe==2.0.1
|
||||
mashumaro==2.5
|
||||
minimal-snowplow-tracker==0.0.2
|
||||
msgpack==1.0.2
|
||||
msrest==0.6.21
|
||||
networkx==2.6.2
|
||||
oauthlib==3.1.1
|
||||
oscrypto==1.2.1
|
||||
packaging==20.9
|
||||
parsedatetime==2.6
|
||||
proto-plus==1.19.0
|
||||
protobuf==3.17.3
|
||||
psycopg2-binary==2.9.1
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycparser==2.20
|
||||
pycryptodomex==3.10.1
|
||||
PyJWT==2.1.0
|
||||
pyOpenSSL==20.0.1
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.18.0
|
||||
python-dateutil==2.8.2
|
||||
python-slugify==5.0.2
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1
|
||||
PyYAML==5.4.1
|
||||
requests==2.26.0
|
||||
requests-oauthlib==1.3.0
|
||||
rsa==4.7.2
|
||||
s3transfer==0.5.0
|
||||
SecretStorage==3.3.1
|
||||
six==1.16.0
|
||||
snowflake-connector-python==2.5.1
|
||||
sqlparse==0.3.1
|
||||
text-unidecode==1.3
|
||||
typing-extensions==3.10.0.0
|
||||
urllib3==1.26.6
|
||||
Werkzeug==2.0.1
|
||||
zipp==3.5.0
|
||||
75
docker/requirements/requirements.0.21.0rc1.txt
Normal file
75
docker/requirements/requirements.0.21.0rc1.txt
Normal file
@@ -0,0 +1,75 @@
|
||||
agate==1.6.1
|
||||
asn1crypto==1.4.0
|
||||
attrs==21.2.0
|
||||
azure-common==1.1.27
|
||||
azure-core==1.18.0
|
||||
azure-storage-blob==12.8.1
|
||||
Babel==2.9.1
|
||||
boto3==1.18.44
|
||||
botocore==1.21.44
|
||||
cachetools==4.2.2
|
||||
certifi==2021.5.30
|
||||
cffi==1.14.6
|
||||
chardet==4.0.0
|
||||
charset-normalizer==2.0.6
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.8
|
||||
google-api-core==1.31.2
|
||||
google-auth==1.35.0
|
||||
google-cloud-bigquery==2.26.0
|
||||
google-cloud-core==1.7.2
|
||||
google-crc32c==1.1.2
|
||||
google-resumable-media==2.0.2
|
||||
googleapis-common-protos==1.53.0
|
||||
grpcio==1.40.0
|
||||
hologram==0.0.14
|
||||
idna==3.2
|
||||
importlib-metadata==4.8.1
|
||||
isodate==0.6.0
|
||||
jeepney==0.7.1
|
||||
Jinja2==2.11.3
|
||||
jmespath==0.10.0
|
||||
json-rpc==1.13.0
|
||||
jsonschema==3.1.1
|
||||
keyring==21.8.0
|
||||
leather==0.3.3
|
||||
Logbook==1.5.3
|
||||
MarkupSafe==2.0.1
|
||||
mashumaro==2.5
|
||||
minimal-snowplow-tracker==0.0.2
|
||||
msgpack==1.0.2
|
||||
msrest==0.6.21
|
||||
networkx==2.6.3
|
||||
oauthlib==3.1.1
|
||||
oscrypto==1.2.1
|
||||
packaging==20.9
|
||||
parsedatetime==2.6
|
||||
proto-plus==1.19.0
|
||||
protobuf==3.18.0
|
||||
psycopg2-binary==2.9.1
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycparser==2.20
|
||||
pycryptodomex==3.10.1
|
||||
PyJWT==2.1.0
|
||||
pyOpenSSL==20.0.1
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.18.0
|
||||
python-dateutil==2.8.2
|
||||
python-slugify==5.0.2
|
||||
pytimeparse==1.1.8
|
||||
pytz==2021.1
|
||||
PyYAML==5.4.1
|
||||
requests==2.26.0
|
||||
requests-oauthlib==1.3.0
|
||||
rsa==4.7.2
|
||||
s3transfer==0.5.0
|
||||
SecretStorage==3.3.1
|
||||
six==1.16.0
|
||||
snowflake-connector-python==2.5.1
|
||||
sqlparse==0.4.2
|
||||
text-unidecode==1.3
|
||||
typing-extensions==3.10.0.2
|
||||
urllib3==1.26.6
|
||||
Werkzeug==2.0.1
|
||||
zipp==3.5.0
|
||||
@@ -1,5 +1,3 @@
|
||||
-e ./core
|
||||
-e ./plugins/postgres
|
||||
-e ./plugins/redshift
|
||||
-e ./plugins/snowflake
|
||||
-e ./plugins/bigquery
|
||||
|
||||
57
performance/runner/Cargo.lock
generated
57
performance/runner/Cargo.lock
generated
@@ -22,12 +22,32 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"time",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.3"
|
||||
@@ -94,6 +114,25 @@ version = "0.2.98"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
@@ -140,6 +179,7 @@ dependencies = [
|
||||
name = "runner"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"itertools",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -254,6 +294,17 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.8.0"
|
||||
@@ -284,6 +335,12 @@ version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
|
||||
@@ -4,6 +4,7 @@ version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
itertools = "0.10.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::exceptions::{CalculateError, IOError};
|
||||
use chrono::prelude::*;
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
@@ -45,6 +46,7 @@ pub struct Data {
|
||||
pub struct Calculation {
|
||||
pub metric: String,
|
||||
pub regression: bool,
|
||||
pub ts: DateTime<Utc>,
|
||||
pub data: Data,
|
||||
}
|
||||
|
||||
@@ -60,6 +62,11 @@ pub struct MeasurementGroup {
|
||||
// Given two measurements, return all the calculations. Calculations are
|
||||
// flagged as regressions or not regressions.
|
||||
fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Calculation> {
|
||||
// choosing the current timestamp for all calculations to be the same.
|
||||
// this timestamp is not from the time of measurement becuase hyperfine
|
||||
// controls that. Since calculation is run directly after, this is fine.
|
||||
let ts = Utc::now();
|
||||
|
||||
let median_threshold = 1.05; // 5% regression threshold
|
||||
let median_difference = dev.median / baseline.median;
|
||||
|
||||
@@ -70,6 +77,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
|
||||
Calculation {
|
||||
metric: ["median", metric].join("_"),
|
||||
regression: median_difference > median_threshold,
|
||||
ts: ts,
|
||||
data: Data {
|
||||
threshold: median_threshold,
|
||||
difference: median_difference,
|
||||
@@ -80,6 +88,7 @@ fn calculate(metric: &str, dev: &Measurement, baseline: &Measurement) -> Vec<Cal
|
||||
Calculation {
|
||||
metric: ["stddev", metric].join("_"),
|
||||
regression: stddev_difference > stddev_threshold,
|
||||
ts: ts,
|
||||
data: Data {
|
||||
threshold: stddev_threshold,
|
||||
difference: stddev_difference,
|
||||
|
||||
@@ -6,6 +6,8 @@ mod measure;
|
||||
|
||||
use crate::calculate::Calculation;
|
||||
use crate::exceptions::CalculateError;
|
||||
use chrono::offset::Utc;
|
||||
use std::fs::metadata;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
@@ -29,6 +31,9 @@ enum Opt {
|
||||
#[structopt(parse(from_os_str))]
|
||||
#[structopt(short)]
|
||||
results_dir: PathBuf,
|
||||
#[structopt(parse(from_os_str))]
|
||||
#[structopt(short)]
|
||||
out_dir: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -62,7 +67,18 @@ fn run_app() -> Result<i32, CalculateError> {
|
||||
}
|
||||
|
||||
// calculate subcommand
|
||||
Opt::Calculate { results_dir } => {
|
||||
Opt::Calculate {
|
||||
results_dir,
|
||||
out_dir,
|
||||
} => {
|
||||
// validate output directory and exit early if it won't work.
|
||||
let md = metadata(&out_dir)
|
||||
.expect("Main: Failed to read specified output directory metadata. Does it exist?");
|
||||
if !md.is_dir() {
|
||||
eprintln!("Main: Output directory is not a directory");
|
||||
return Ok(1);
|
||||
}
|
||||
|
||||
// get all the calculations or gracefully show the user an exception
|
||||
let calculations = calculate::regressions(&results_dir)?;
|
||||
|
||||
@@ -77,9 +93,18 @@ fn run_app() -> Result<i32, CalculateError> {
|
||||
let json_calcs = serde_json::to_string_pretty(&calculations)
|
||||
.expect("Main: Failed to serialize calculations to json");
|
||||
|
||||
// if there are any calculations, use the first timestamp, if there are none
|
||||
// just use the current time.
|
||||
let ts = calculations
|
||||
.first()
|
||||
.map_or_else(|| Utc::now(), |calc| calc.ts);
|
||||
|
||||
// create the empty destination file, and write the json string
|
||||
let outfile = &mut results_dir.into_os_string();
|
||||
outfile.push("/final_calculations.json");
|
||||
let outfile = &mut out_dir.into_os_string();
|
||||
outfile.push("/final_calculations_");
|
||||
outfile.push(ts.timestamp().to_string());
|
||||
outfile.push(".json");
|
||||
|
||||
let mut f = File::create(outfile).expect("Main: Unable to create file");
|
||||
f.write_all(json_calcs.as_bytes())
|
||||
.expect("Main: Unable to write data");
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/dbt-labs/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
|
||||
</p>
|
||||
|
||||
**[dbt](https://www.getdbt.com/)** (data build tool) enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
|
||||
|
||||
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
|
||||
|
||||
## dbt-bigquery
|
||||
|
||||
The `dbt-bigquery` package contains all of the code required to make dbt operate on a BigQuery database. For
|
||||
more information on using dbt with BigQuery, consult [the docs](https://docs.getdbt.com/docs/profile-bigquery).
|
||||
|
||||
|
||||
## Find out more
|
||||
|
||||
- Check out the [Introduction to dbt](https://docs.getdbt.com/docs/introduction/).
|
||||
- Read the [dbt Viewpoint](https://docs.getdbt.com/docs/about/viewpoint/).
|
||||
|
||||
## Join thousands of analysts in the dbt community
|
||||
|
||||
- Join the [chat](http://community.getdbt.com/) on Slack.
|
||||
- Find community posts on [dbt Discourse](https://discourse.getdbt.com).
|
||||
|
||||
## Reporting bugs and contributing code
|
||||
|
||||
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt/issues/new).
|
||||
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](https://github.com/dbt-labs/dbt/blob/HEAD/CONTRIBUTING.md)
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct).
|
||||
@@ -1,13 +0,0 @@
|
||||
from dbt.adapters.bigquery.connections import BigQueryConnectionManager # noqa
|
||||
from dbt.adapters.bigquery.connections import BigQueryCredentials
|
||||
from dbt.adapters.bigquery.relation import BigQueryRelation # noqa
|
||||
from dbt.adapters.bigquery.column import BigQueryColumn # noqa
|
||||
from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget # noqa
|
||||
|
||||
from dbt.adapters.base import AdapterPlugin
|
||||
from dbt.include import bigquery
|
||||
|
||||
Plugin = AdapterPlugin(
|
||||
adapter=BigQueryAdapter,
|
||||
credentials=BigQueryCredentials,
|
||||
include_path=bigquery.PACKAGE_PATH)
|
||||
@@ -1 +0,0 @@
|
||||
version = '0.21.0b1'
|
||||
@@ -1,127 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List, TypeVar, Iterable, Type
|
||||
|
||||
from dbt.adapters.base.column import Column
|
||||
|
||||
from google.cloud.bigquery import SchemaField
|
||||
|
||||
Self = TypeVar('Self', bound='BigQueryColumn')
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class BigQueryColumn(Column):
|
||||
TYPE_LABELS = {
|
||||
'STRING': 'STRING',
|
||||
'TIMESTAMP': 'TIMESTAMP',
|
||||
'FLOAT': 'FLOAT64',
|
||||
'INTEGER': 'INT64',
|
||||
'RECORD': 'RECORD',
|
||||
}
|
||||
fields: List[Self]
|
||||
mode: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
column: str,
|
||||
dtype: str,
|
||||
fields: Optional[Iterable[SchemaField]] = None,
|
||||
mode: str = 'NULLABLE',
|
||||
) -> None:
|
||||
super().__init__(column, dtype)
|
||||
|
||||
if fields is None:
|
||||
fields = []
|
||||
|
||||
self.fields = self.wrap_subfields(fields)
|
||||
self.mode = mode
|
||||
|
||||
@classmethod
|
||||
def wrap_subfields(
|
||||
cls: Type[Self], fields: Iterable[SchemaField]
|
||||
) -> List[Self]:
|
||||
return [cls.create_from_field(field) for field in fields]
|
||||
|
||||
@classmethod
|
||||
def create_from_field(cls: Type[Self], field: SchemaField) -> Self:
|
||||
return cls(
|
||||
field.name,
|
||||
cls.translate_type(field.field_type),
|
||||
field.fields,
|
||||
field.mode,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _flatten_recursive(
|
||||
cls: Type[Self], col: Self, prefix: Optional[str] = None
|
||||
) -> List[Self]:
|
||||
if prefix is None:
|
||||
prefix = []
|
||||
|
||||
if len(col.fields) == 0:
|
||||
prefixed_name = ".".join(prefix + [col.column])
|
||||
new_col = cls(prefixed_name, col.dtype, col.fields, col.mode)
|
||||
return [new_col]
|
||||
|
||||
new_fields = []
|
||||
for field in col.fields:
|
||||
new_prefix = prefix + [col.column]
|
||||
new_fields.extend(cls._flatten_recursive(field, new_prefix))
|
||||
|
||||
return new_fields
|
||||
|
||||
def flatten(self):
|
||||
return self._flatten_recursive(self)
|
||||
|
||||
@property
|
||||
def quoted(self):
|
||||
return '`{}`'.format(self.column)
|
||||
|
||||
def literal(self, value):
|
||||
return "cast({} as {})".format(value, self.dtype)
|
||||
|
||||
@property
|
||||
def data_type(self) -> str:
|
||||
if self.dtype.upper() == 'RECORD':
|
||||
subcols = [
|
||||
"{} {}".format(col.name, col.data_type) for col in self.fields
|
||||
]
|
||||
field_type = 'STRUCT<{}>'.format(", ".join(subcols))
|
||||
|
||||
else:
|
||||
field_type = self.dtype
|
||||
|
||||
if self.mode.upper() == 'REPEATED':
|
||||
return 'ARRAY<{}>'.format(field_type)
|
||||
|
||||
else:
|
||||
return field_type
|
||||
|
||||
def is_string(self) -> bool:
|
||||
return self.dtype.lower() == 'string'
|
||||
|
||||
def is_integer(self) -> bool:
|
||||
return self.dtype.lower() == 'int64'
|
||||
|
||||
def is_numeric(self) -> bool:
|
||||
return self.dtype.lower() == 'numeric'
|
||||
|
||||
def is_float(self):
|
||||
return self.dtype.lower() == 'float64'
|
||||
|
||||
def can_expand_to(self: Self, other_column: Self) -> bool:
|
||||
"""returns True if both columns are strings"""
|
||||
return self.is_string() and other_column.is_string()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<BigQueryColumn {} ({}, {})>".format(self.name, self.data_type,
|
||||
self.mode)
|
||||
|
||||
def column_to_bq_schema(self) -> SchemaField:
|
||||
"""Convert a column to a bigquery schema object.
|
||||
"""
|
||||
kwargs = {}
|
||||
if len(self.fields) > 0:
|
||||
fields = [field.column_to_bq_schema() for field in self.fields]
|
||||
kwargs = {"fields": fields}
|
||||
|
||||
return SchemaField(self.name, self.dtype, self.mode, **kwargs)
|
||||
@@ -1,634 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
import agate
|
||||
from requests.exceptions import ConnectionError
|
||||
from typing import Optional, Any, Dict, Tuple
|
||||
|
||||
import google.auth
|
||||
import google.auth.exceptions
|
||||
import google.cloud.bigquery
|
||||
import google.cloud.exceptions
|
||||
from google.api_core import retry, client_info
|
||||
from google.auth import impersonated_credentials
|
||||
from google.oauth2 import (
|
||||
credentials as GoogleCredentials,
|
||||
service_account as GoogleServiceAccountCredentials
|
||||
)
|
||||
|
||||
from dbt.utils import format_bytes, format_rows_number
|
||||
from dbt.clients import agate_helper, gcloud
|
||||
from dbt.tracking import active_user
|
||||
from dbt.contracts.connection import ConnectionState, AdapterResponse
|
||||
from dbt.exceptions import (
|
||||
FailedToConnectException, RuntimeException, DatabaseException
|
||||
)
|
||||
from dbt.adapters.base import BaseConnectionManager, Credentials
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
from dbt.version import __version__ as dbt_version
|
||||
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
|
||||
BQ_QUERY_JOB_SPLIT = '-----Query Job SQL Follows-----'
|
||||
|
||||
WRITE_TRUNCATE = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE
|
||||
|
||||
REOPENABLE_ERRORS = (
|
||||
ConnectionResetError,
|
||||
ConnectionError,
|
||||
)
|
||||
|
||||
RETRYABLE_ERRORS = (
|
||||
google.cloud.exceptions.ServerError,
|
||||
google.cloud.exceptions.BadRequest,
|
||||
ConnectionResetError,
|
||||
ConnectionError,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_bigquery_defaults(scopes=None) -> Tuple[Any, Optional[str]]:
|
||||
"""
|
||||
Returns (credentials, project_id)
|
||||
|
||||
project_id is returned available from the environment; otherwise None
|
||||
"""
|
||||
# Cached, because the underlying implementation shells out, taking ~1s
|
||||
return google.auth.default(scopes=scopes)
|
||||
|
||||
|
||||
class Priority(StrEnum):
|
||||
Interactive = 'interactive'
|
||||
Batch = 'batch'
|
||||
|
||||
|
||||
class BigQueryConnectionMethod(StrEnum):
|
||||
OAUTH = 'oauth'
|
||||
SERVICE_ACCOUNT = 'service-account'
|
||||
SERVICE_ACCOUNT_JSON = 'service-account-json'
|
||||
OAUTH_SECRETS = 'oauth-secrets'
|
||||
|
||||
|
||||
@dataclass
|
||||
class BigQueryAdapterResponse(AdapterResponse):
|
||||
bytes_processed: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BigQueryCredentials(Credentials):
|
||||
method: BigQueryConnectionMethod
|
||||
# BigQuery allows an empty database / project, where it defers to the
|
||||
# environment for the project
|
||||
database: Optional[str]
|
||||
timeout_seconds: Optional[int] = 300
|
||||
location: Optional[str] = None
|
||||
priority: Optional[Priority] = None
|
||||
retries: Optional[int] = 1
|
||||
maximum_bytes_billed: Optional[int] = None
|
||||
impersonate_service_account: Optional[str] = None
|
||||
|
||||
# Keyfile json creds
|
||||
keyfile: Optional[str] = None
|
||||
keyfile_json: Optional[Dict[str, Any]] = None
|
||||
|
||||
# oauth-secrets
|
||||
token: Optional[str] = None
|
||||
refresh_token: Optional[str] = None
|
||||
client_id: Optional[str] = None
|
||||
client_secret: Optional[str] = None
|
||||
token_uri: Optional[str] = None
|
||||
|
||||
_ALIASES = {
|
||||
'project': 'database',
|
||||
'dataset': 'schema',
|
||||
'target_project': 'target_database',
|
||||
'target_dataset': 'target_schema',
|
||||
}
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return 'bigquery'
|
||||
|
||||
@property
|
||||
def unique_field(self):
|
||||
return self.database
|
||||
|
||||
def _connection_keys(self):
|
||||
return ('method', 'database', 'schema', 'location', 'priority',
|
||||
'timeout_seconds', 'maximum_bytes_billed')
|
||||
|
||||
@classmethod
|
||||
def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]:
|
||||
# We need to inject the correct value of the database (aka project) at
|
||||
# this stage, ref
|
||||
# https://github.com/dbt-labs/dbt/pull/2908#discussion_r532927436.
|
||||
|
||||
# `database` is an alias of `project` in BigQuery
|
||||
if 'database' not in d:
|
||||
_, database = get_bigquery_defaults()
|
||||
d['database'] = database
|
||||
return d
|
||||
|
||||
|
||||
class BigQueryConnectionManager(BaseConnectionManager):
|
||||
TYPE = 'bigquery'
|
||||
|
||||
SCOPE = ('https://www.googleapis.com/auth/bigquery',
|
||||
'https://www.googleapis.com/auth/cloud-platform',
|
||||
'https://www.googleapis.com/auth/drive')
|
||||
|
||||
QUERY_TIMEOUT = 300
|
||||
RETRIES = 1
|
||||
DEFAULT_INITIAL_DELAY = 1.0 # Seconds
|
||||
DEFAULT_MAXIMUM_DELAY = 1.0 # Seconds
|
||||
|
||||
@classmethod
|
||||
def handle_error(cls, error, message):
|
||||
error_msg = "\n".join([item['message'] for item in error.errors])
|
||||
raise DatabaseException(error_msg)
|
||||
|
||||
def clear_transaction(self):
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def exception_handler(self, sql):
|
||||
try:
|
||||
yield
|
||||
|
||||
except google.cloud.exceptions.BadRequest as e:
|
||||
message = "Bad request while running query"
|
||||
self.handle_error(e, message)
|
||||
|
||||
except google.cloud.exceptions.Forbidden as e:
|
||||
message = "Access denied while running query"
|
||||
self.handle_error(e, message)
|
||||
|
||||
except google.auth.exceptions.RefreshError as e:
|
||||
message = "Unable to generate access token, if you're using " \
|
||||
"impersonate_service_account, make sure your " \
|
||||
'initial account has the "roles/' \
|
||||
'iam.serviceAccountTokenCreator" role on the ' \
|
||||
'account you are trying to impersonate.\n\n' \
|
||||
f'{str(e)}'
|
||||
raise RuntimeException(message)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Unhandled error while running:\n{}".format(sql))
|
||||
logger.debug(e)
|
||||
if isinstance(e, RuntimeException):
|
||||
# during a sql query, an internal to dbt exception was raised.
|
||||
# this sounds a lot like a signal handler and probably has
|
||||
# useful information, so raise it without modification.
|
||||
raise
|
||||
exc_message = str(e)
|
||||
# the google bigquery library likes to add the query log, which we
|
||||
# don't want to log. Hopefully they never change this!
|
||||
if BQ_QUERY_JOB_SPLIT in exc_message:
|
||||
exc_message = exc_message.split(BQ_QUERY_JOB_SPLIT)[0].strip()
|
||||
raise RuntimeException(exc_message)
|
||||
|
||||
def cancel_open(self) -> None:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def close(cls, connection):
|
||||
connection.state = ConnectionState.CLOSED
|
||||
|
||||
return connection
|
||||
|
||||
def begin(self):
|
||||
pass
|
||||
|
||||
def commit(self):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def get_bigquery_credentials(cls, profile_credentials):
|
||||
method = profile_credentials.method
|
||||
creds = GoogleServiceAccountCredentials.Credentials
|
||||
|
||||
if method == BigQueryConnectionMethod.OAUTH:
|
||||
credentials, _ = get_bigquery_defaults(scopes=cls.SCOPE)
|
||||
return credentials
|
||||
|
||||
elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT:
|
||||
keyfile = profile_credentials.keyfile
|
||||
return creds.from_service_account_file(keyfile, scopes=cls.SCOPE)
|
||||
|
||||
elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON:
|
||||
details = profile_credentials.keyfile_json
|
||||
return creds.from_service_account_info(details, scopes=cls.SCOPE)
|
||||
|
||||
elif method == BigQueryConnectionMethod.OAUTH_SECRETS:
|
||||
return GoogleCredentials.Credentials(
|
||||
token=profile_credentials.token,
|
||||
refresh_token=profile_credentials.refresh_token,
|
||||
client_id=profile_credentials.client_id,
|
||||
client_secret=profile_credentials.client_secret,
|
||||
token_uri=profile_credentials.token_uri,
|
||||
scopes=cls.SCOPE
|
||||
)
|
||||
|
||||
error = ('Invalid `method` in profile: "{}"'.format(method))
|
||||
raise FailedToConnectException(error)
|
||||
|
||||
@classmethod
|
||||
def get_impersonated_bigquery_credentials(cls, profile_credentials):
|
||||
source_credentials = cls.get_bigquery_credentials(profile_credentials)
|
||||
return impersonated_credentials.Credentials(
|
||||
source_credentials=source_credentials,
|
||||
target_principal=profile_credentials.impersonate_service_account,
|
||||
target_scopes=list(cls.SCOPE),
|
||||
lifetime=profile_credentials.timeout_seconds,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_bigquery_client(cls, profile_credentials):
|
||||
if profile_credentials.impersonate_service_account:
|
||||
creds =\
|
||||
cls.get_impersonated_bigquery_credentials(profile_credentials)
|
||||
else:
|
||||
creds = cls.get_bigquery_credentials(profile_credentials)
|
||||
database = profile_credentials.database
|
||||
location = getattr(profile_credentials, 'location', None)
|
||||
|
||||
info = client_info.ClientInfo(user_agent=f'dbt-{dbt_version}')
|
||||
return google.cloud.bigquery.Client(
|
||||
database,
|
||||
creds,
|
||||
location=location,
|
||||
client_info=info,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def open(cls, connection):
|
||||
if connection.state == 'open':
|
||||
logger.debug('Connection is already open, skipping open.')
|
||||
return connection
|
||||
|
||||
try:
|
||||
handle = cls.get_bigquery_client(connection.credentials)
|
||||
|
||||
except google.auth.exceptions.DefaultCredentialsError:
|
||||
logger.info("Please log into GCP to continue")
|
||||
gcloud.setup_default_credentials()
|
||||
|
||||
handle = cls.get_bigquery_client(connection.credentials)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Got an error when attempting to create a bigquery "
|
||||
"client: '{}'".format(e))
|
||||
|
||||
connection.handle = None
|
||||
connection.state = 'fail'
|
||||
|
||||
raise FailedToConnectException(str(e))
|
||||
|
||||
connection.handle = handle
|
||||
connection.state = 'open'
|
||||
return connection
|
||||
|
||||
@classmethod
|
||||
def get_timeout(cls, conn):
|
||||
credentials = conn.credentials
|
||||
return credentials.timeout_seconds
|
||||
|
||||
@classmethod
|
||||
def get_retries(cls, conn) -> int:
|
||||
credentials = conn.credentials
|
||||
if credentials.retries is not None:
|
||||
return credentials.retries
|
||||
else:
|
||||
return 1
|
||||
|
||||
@classmethod
|
||||
def get_table_from_response(cls, resp):
|
||||
column_names = [field.name for field in resp.schema]
|
||||
return agate_helper.table_from_data_flat(resp, column_names)
|
||||
|
||||
def raw_execute(self, sql, fetch=False, *, use_legacy_sql=False):
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
logger.debug('On {}: {}', conn.name, sql)
|
||||
|
||||
if self.profile.query_comment and self.profile.query_comment.job_label:
|
||||
query_comment = self.query_header.comment.query_comment
|
||||
labels = self._labels_from_query_comment(query_comment)
|
||||
else:
|
||||
labels = {}
|
||||
|
||||
if active_user:
|
||||
labels['dbt_invocation_id'] = active_user.invocation_id
|
||||
|
||||
job_params = {'use_legacy_sql': use_legacy_sql, 'labels': labels}
|
||||
|
||||
priority = conn.credentials.priority
|
||||
if priority == Priority.Batch:
|
||||
job_params['priority'] = google.cloud.bigquery.QueryPriority.BATCH
|
||||
else:
|
||||
job_params[
|
||||
'priority'] = google.cloud.bigquery.QueryPriority.INTERACTIVE
|
||||
|
||||
maximum_bytes_billed = conn.credentials.maximum_bytes_billed
|
||||
if maximum_bytes_billed is not None and maximum_bytes_billed != 0:
|
||||
job_params['maximum_bytes_billed'] = maximum_bytes_billed
|
||||
|
||||
def fn():
|
||||
return self._query_and_results(client, sql, conn, job_params)
|
||||
|
||||
query_job, iterator = self._retry_and_handle(msg=sql, conn=conn, fn=fn)
|
||||
|
||||
return query_job, iterator
|
||||
|
||||
def execute(
|
||||
self, sql, auto_begin=False, fetch=None
|
||||
) -> Tuple[BigQueryAdapterResponse, agate.Table]:
|
||||
sql = self._add_query_comment(sql)
|
||||
# auto_begin is ignored on bigquery, and only included for consistency
|
||||
query_job, iterator = self.raw_execute(sql, fetch=fetch)
|
||||
|
||||
if fetch:
|
||||
table = self.get_table_from_response(iterator)
|
||||
else:
|
||||
table = agate_helper.empty_table()
|
||||
|
||||
message = 'OK'
|
||||
code = None
|
||||
num_rows = None
|
||||
bytes_processed = None
|
||||
|
||||
if query_job.statement_type == 'CREATE_VIEW':
|
||||
code = 'CREATE VIEW'
|
||||
|
||||
elif query_job.statement_type == 'CREATE_TABLE_AS_SELECT':
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
query_table = client.get_table(query_job.destination)
|
||||
code = 'CREATE TABLE'
|
||||
num_rows = query_table.num_rows
|
||||
bytes_processed = query_job.total_bytes_processed
|
||||
message = '{} ({} rows, {} processed)'.format(
|
||||
code,
|
||||
format_rows_number(num_rows),
|
||||
format_bytes(bytes_processed)
|
||||
)
|
||||
|
||||
elif query_job.statement_type == 'SCRIPT':
|
||||
code = 'SCRIPT'
|
||||
bytes_processed = query_job.total_bytes_processed
|
||||
message = f'{code} ({format_bytes(bytes_processed)} processed)'
|
||||
|
||||
elif query_job.statement_type in ['INSERT', 'DELETE', 'MERGE']:
|
||||
code = query_job.statement_type
|
||||
num_rows = query_job.num_dml_affected_rows
|
||||
bytes_processed = query_job.total_bytes_processed
|
||||
message = '{} ({} rows, {} processed)'.format(
|
||||
code,
|
||||
format_rows_number(num_rows),
|
||||
format_bytes(bytes_processed),
|
||||
)
|
||||
|
||||
response = BigQueryAdapterResponse(
|
||||
_message=message,
|
||||
rows_affected=num_rows,
|
||||
code=code,
|
||||
bytes_processed=bytes_processed
|
||||
)
|
||||
|
||||
return response, table
|
||||
|
||||
def get_partitions_metadata(self, table):
|
||||
def standard_to_legacy(table):
|
||||
return table.project + ':' + table.dataset + '.' + table.identifier
|
||||
|
||||
legacy_sql = 'SELECT * FROM ['\
|
||||
+ standard_to_legacy(table) + '$__PARTITIONS_SUMMARY__]'
|
||||
|
||||
sql = self._add_query_comment(legacy_sql)
|
||||
# auto_begin is ignored on bigquery, and only included for consistency
|
||||
_, iterator =\
|
||||
self.raw_execute(sql, fetch='fetch_result', use_legacy_sql=True)
|
||||
return self.get_table_from_response(iterator)
|
||||
|
||||
def create_bigquery_table(self, database, schema, table_name, callback,
|
||||
sql):
|
||||
"""Create a bigquery table. The caller must supply a callback
|
||||
that takes one argument, a `google.cloud.bigquery.Table`, and mutates
|
||||
it.
|
||||
"""
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
view_ref = self.table_ref(database, schema, table_name, conn)
|
||||
view = google.cloud.bigquery.Table(view_ref)
|
||||
callback(view)
|
||||
|
||||
def fn():
|
||||
return client.create_table(view)
|
||||
self._retry_and_handle(msg=sql, conn=conn, fn=fn)
|
||||
|
||||
def create_view(self, database, schema, table_name, sql):
|
||||
def callback(table):
|
||||
table.view_query = sql
|
||||
table.view_use_legacy_sql = False
|
||||
|
||||
self.create_bigquery_table(database, schema, table_name, callback, sql)
|
||||
|
||||
def create_table(self, database, schema, table_name, sql):
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
table_ref = self.table_ref(database, schema, table_name, conn)
|
||||
job_params = {'destination': table_ref,
|
||||
'write_disposition': WRITE_TRUNCATE}
|
||||
|
||||
timeout = self.get_timeout(conn)
|
||||
|
||||
def fn():
|
||||
return self._query_and_results(client, sql, conn, job_params,
|
||||
timeout=timeout)
|
||||
self._retry_and_handle(msg=sql, conn=conn, fn=fn)
|
||||
|
||||
def create_date_partitioned_table(self, database, schema, table_name):
|
||||
def callback(table):
|
||||
table.partitioning_type = 'DAY'
|
||||
|
||||
self.create_bigquery_table(database, schema, table_name, callback,
|
||||
'CREATE DAY PARTITIONED TABLE')
|
||||
|
||||
def copy_bq_table(self, source, destination, write_disposition):
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
# -------------------------------------------------------------------------------
|
||||
# BigQuery allows to use copy API using two different formats:
|
||||
# 1. client.copy_table(source_table_id, destination_table_id)
|
||||
# where source_table_id = "your-project.source_dataset.source_table"
|
||||
# 2. client.copy_table(source_table_ids, destination_table_id)
|
||||
# where source_table_ids = ["your-project.your_dataset.your_table_name", ...]
|
||||
# Let's use uniform function call and always pass list there
|
||||
# -------------------------------------------------------------------------------
|
||||
if type(source) is not list:
|
||||
source = [source]
|
||||
|
||||
source_ref_array = [self.table_ref(
|
||||
src_table.database, src_table.schema, src_table.table, conn)
|
||||
for src_table in source]
|
||||
destination_ref = self.table_ref(
|
||||
destination.database, destination.schema, destination.table, conn)
|
||||
|
||||
logger.debug(
|
||||
'Copying table(s) "{}" to "{}" with disposition: "{}"',
|
||||
', '.join(source_ref.path for source_ref in source_ref_array),
|
||||
destination_ref.path, write_disposition)
|
||||
|
||||
def copy_and_results():
|
||||
job_config = google.cloud.bigquery.CopyJobConfig(
|
||||
write_disposition=write_disposition)
|
||||
copy_job = client.copy_table(
|
||||
source_ref_array, destination_ref, job_config=job_config)
|
||||
iterator = copy_job.result(timeout=self.get_timeout(conn))
|
||||
return copy_job, iterator
|
||||
|
||||
self._retry_and_handle(
|
||||
msg='copy table "{}" to "{}"'.format(
|
||||
', '.join(source_ref.path for source_ref in source_ref_array),
|
||||
destination_ref.path),
|
||||
conn=conn, fn=copy_and_results)
|
||||
|
||||
@staticmethod
|
||||
def dataset(database, schema, conn):
|
||||
dataset_ref = conn.handle.dataset(schema, database)
|
||||
return google.cloud.bigquery.Dataset(dataset_ref)
|
||||
|
||||
@staticmethod
|
||||
def dataset_from_id(dataset_id):
|
||||
return google.cloud.bigquery.Dataset.from_string(dataset_id)
|
||||
|
||||
def table_ref(self, database, schema, table_name, conn):
|
||||
dataset = self.dataset(database, schema, conn)
|
||||
return dataset.table(table_name)
|
||||
|
||||
def get_bq_table(self, database, schema, identifier):
|
||||
"""Get a bigquery table for a schema/model."""
|
||||
conn = self.get_thread_connection()
|
||||
table_ref = self.table_ref(database, schema, identifier, conn)
|
||||
return conn.handle.get_table(table_ref)
|
||||
|
||||
def drop_dataset(self, database, schema):
|
||||
conn = self.get_thread_connection()
|
||||
dataset = self.dataset(database, schema, conn)
|
||||
client = conn.handle
|
||||
|
||||
def fn():
|
||||
return client.delete_dataset(
|
||||
dataset, delete_contents=True, not_found_ok=True)
|
||||
|
||||
self._retry_and_handle(
|
||||
msg='drop dataset', conn=conn, fn=fn)
|
||||
|
||||
def create_dataset(self, database, schema):
|
||||
conn = self.get_thread_connection()
|
||||
client = conn.handle
|
||||
dataset = self.dataset(database, schema, conn)
|
||||
|
||||
def fn():
|
||||
return client.create_dataset(dataset, exists_ok=True)
|
||||
self._retry_and_handle(msg='create dataset', conn=conn, fn=fn)
|
||||
|
||||
def _query_and_results(self, client, sql, conn, job_params, timeout=None):
|
||||
"""Query the client and wait for results."""
|
||||
# Cannot reuse job_config if destination is set and ddl is used
|
||||
job_config = google.cloud.bigquery.QueryJobConfig(**job_params)
|
||||
query_job = client.query(sql, job_config=job_config)
|
||||
iterator = query_job.result(timeout=timeout)
|
||||
|
||||
return query_job, iterator
|
||||
|
||||
def _retry_and_handle(self, msg, conn, fn):
|
||||
"""retry a function call within the context of exception_handler."""
|
||||
def reopen_conn_on_error(error):
|
||||
if isinstance(error, REOPENABLE_ERRORS):
|
||||
logger.warning('Reopening connection after {!r}', error)
|
||||
self.close(conn)
|
||||
self.open(conn)
|
||||
return
|
||||
|
||||
with self.exception_handler(msg):
|
||||
return retry.retry_target(
|
||||
target=fn,
|
||||
predicate=_ErrorCounter(self.get_retries(conn)).count_error,
|
||||
sleep_generator=self._retry_generator(),
|
||||
deadline=None,
|
||||
on_error=reopen_conn_on_error)
|
||||
|
||||
def _retry_generator(self):
|
||||
"""Generates retry intervals that exponentially back off."""
|
||||
return retry.exponential_sleep_generator(
|
||||
initial=self.DEFAULT_INITIAL_DELAY,
|
||||
maximum=self.DEFAULT_MAXIMUM_DELAY)
|
||||
|
||||
def _labels_from_query_comment(self, comment: str) -> Dict:
|
||||
try:
|
||||
comment_labels = json.loads(comment)
|
||||
except (TypeError, ValueError):
|
||||
return {'query_comment': _sanitize_label(comment)}
|
||||
return {
|
||||
_sanitize_label(key): _sanitize_label(str(value))
|
||||
for key, value in comment_labels.items()
|
||||
}
|
||||
|
||||
|
||||
class _ErrorCounter(object):
|
||||
"""Counts errors seen up to a threshold then raises the next error."""
|
||||
|
||||
def __init__(self, retries):
|
||||
self.retries = retries
|
||||
self.error_count = 0
|
||||
|
||||
def count_error(self, error):
|
||||
if self.retries == 0:
|
||||
return False # Don't log
|
||||
self.error_count += 1
|
||||
if _is_retryable(error) and self.error_count <= self.retries:
|
||||
logger.debug(
|
||||
'Retry attempt {} of {} after error: {}',
|
||||
self.error_count, self.retries, repr(error))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def _is_retryable(error):
|
||||
"""Return true for errors that are unlikely to occur again if retried."""
|
||||
if isinstance(error, RETRYABLE_ERRORS):
|
||||
return True
|
||||
elif isinstance(error, google.api_core.exceptions.Forbidden) and any(
|
||||
e['reason'] == 'rateLimitExceeded' for e in error.errors):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
_SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]")
|
||||
|
||||
_VALIDATE_LABEL_LENGTH_LIMIT = 63
|
||||
|
||||
|
||||
def _sanitize_label(value: str) -> str:
|
||||
"""Return a legal value for a BigQuery label."""
|
||||
value = value.strip().lower()
|
||||
value = _SANITIZE_LABEL_PATTERN.sub("_", value)
|
||||
value_length = len(value)
|
||||
if value_length > _VALIDATE_LABEL_LENGTH_LIMIT:
|
||||
error_msg = (
|
||||
f"Job label length {value_length} is greater than length limit: "
|
||||
f"{_VALIDATE_LABEL_LENGTH_LIMIT}\n"
|
||||
f"Current sanitized label: {value}"
|
||||
)
|
||||
raise RuntimeException(error_msg)
|
||||
else:
|
||||
return value
|
||||
@@ -1,885 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Any, Set, Union
|
||||
from dbt.dataclass_schema import dbtClassMixin, ValidationError
|
||||
|
||||
import dbt.deprecations
|
||||
import dbt.exceptions
|
||||
import dbt.flags as flags
|
||||
import dbt.clients.gcloud
|
||||
import dbt.clients.agate_helper
|
||||
|
||||
from dbt import ui
|
||||
from dbt.adapters.base import (
|
||||
BaseAdapter, available, RelationType, SchemaSearchMap, AdapterConfig
|
||||
)
|
||||
from dbt.adapters.bigquery.relation import BigQueryRelation
|
||||
from dbt.adapters.bigquery import BigQueryColumn
|
||||
from dbt.adapters.bigquery import BigQueryConnectionManager
|
||||
from dbt.contracts.connection import Connection
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.logger import GLOBAL_LOGGER as logger, print_timestamped_line
|
||||
from dbt.utils import filter_null_values
|
||||
|
||||
import google.auth
|
||||
import google.api_core
|
||||
import google.oauth2
|
||||
import google.cloud.exceptions
|
||||
import google.cloud.bigquery
|
||||
|
||||
from google.cloud.bigquery import AccessEntry, SchemaField
|
||||
|
||||
import time
|
||||
import agate
|
||||
import json
|
||||
|
||||
# Write dispositions for bigquery.
|
||||
WRITE_APPEND = google.cloud.bigquery.job.WriteDisposition.WRITE_APPEND
|
||||
WRITE_TRUNCATE = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE
|
||||
|
||||
|
||||
def sql_escape(string):
|
||||
if not isinstance(string, str):
|
||||
dbt.exceptions.raise_compiler_exception(
|
||||
f'cannot escape a non-string: {string}'
|
||||
)
|
||||
|
||||
return json.dumps(string)[1:-1]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PartitionConfig(dbtClassMixin):
|
||||
field: str
|
||||
data_type: str = 'date'
|
||||
granularity: str = 'day'
|
||||
range: Optional[Dict[str, Any]] = None
|
||||
|
||||
def render(self, alias: Optional[str] = None):
|
||||
column: str = self.field
|
||||
if alias:
|
||||
column = f'{alias}.{self.field}'
|
||||
|
||||
if self.data_type.lower() == 'int64' or (
|
||||
self.data_type.lower() == 'date' and
|
||||
self.granularity.lower() == 'day'
|
||||
):
|
||||
return column
|
||||
else:
|
||||
return f'{self.data_type}_trunc({column}, {self.granularity})'
|
||||
|
||||
@classmethod
|
||||
def parse(cls, raw_partition_by) -> Optional['PartitionConfig']:
|
||||
if raw_partition_by is None:
|
||||
return None
|
||||
try:
|
||||
cls.validate(raw_partition_by)
|
||||
return cls.from_dict(raw_partition_by)
|
||||
except ValidationError as exc:
|
||||
msg = dbt.exceptions.validator_error_message(exc)
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Could not parse partition config: {msg}'
|
||||
)
|
||||
except TypeError:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Invalid partition_by config:\n'
|
||||
f' Got: {raw_partition_by}\n'
|
||||
f' Expected a dictionary with "field" and "data_type" keys'
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GrantTarget(dbtClassMixin):
|
||||
dataset: str
|
||||
project: str
|
||||
|
||||
def render(self):
|
||||
return f'{self.project}.{self.dataset}'
|
||||
|
||||
|
||||
def _stub_relation(*args, **kwargs):
|
||||
return BigQueryRelation.create(
|
||||
database='',
|
||||
schema='',
|
||||
identifier='',
|
||||
quote_policy={},
|
||||
type=BigQueryRelation.Table
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BigqueryConfig(AdapterConfig):
|
||||
cluster_by: Optional[Union[List[str], str]] = None
|
||||
partition_by: Optional[Dict[str, Any]] = None
|
||||
kms_key_name: Optional[str] = None
|
||||
labels: Optional[Dict[str, str]] = None
|
||||
partitions: Optional[List[str]] = None
|
||||
grant_access_to: Optional[List[Dict[str, str]]] = None
|
||||
hours_to_expiration: Optional[int] = None
|
||||
require_partition_filter: Optional[bool] = None
|
||||
partition_expiration_days: Optional[int] = None
|
||||
merge_update_columns: Optional[str] = None
|
||||
|
||||
|
||||
class BigQueryAdapter(BaseAdapter):
|
||||
|
||||
RELATION_TYPES = {
|
||||
'TABLE': RelationType.Table,
|
||||
'VIEW': RelationType.View,
|
||||
'EXTERNAL': RelationType.External
|
||||
}
|
||||
|
||||
Relation = BigQueryRelation
|
||||
Column = BigQueryColumn
|
||||
ConnectionManager = BigQueryConnectionManager
|
||||
|
||||
AdapterSpecificConfigs = BigqueryConfig
|
||||
|
||||
###
|
||||
# Implementations of abstract methods
|
||||
###
|
||||
|
||||
@classmethod
|
||||
def date_function(cls) -> str:
|
||||
return 'CURRENT_TIMESTAMP()'
|
||||
|
||||
@classmethod
|
||||
def is_cancelable(cls) -> bool:
|
||||
return False
|
||||
|
||||
def drop_relation(self, relation: BigQueryRelation) -> None:
|
||||
is_cached = self._schema_is_cached(relation.database, relation.schema)
|
||||
if is_cached:
|
||||
self.cache_dropped(relation)
|
||||
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
dataset = self.connections.dataset(relation.database, relation.schema,
|
||||
conn)
|
||||
relation_object = dataset.table(relation.identifier)
|
||||
client.delete_table(relation_object)
|
||||
|
||||
def truncate_relation(self, relation: BigQueryRelation) -> None:
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`truncate` is not implemented for this adapter!'
|
||||
)
|
||||
|
||||
def rename_relation(
|
||||
self, from_relation: BigQueryRelation, to_relation: BigQueryRelation
|
||||
) -> None:
|
||||
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
from_table_ref = self.connections.table_ref(from_relation.database,
|
||||
from_relation.schema,
|
||||
from_relation.identifier,
|
||||
conn)
|
||||
from_table = client.get_table(from_table_ref)
|
||||
if from_table.table_type == "VIEW" or \
|
||||
from_relation.type == RelationType.View or \
|
||||
to_relation.type == RelationType.View:
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
'Renaming of views is not currently supported in BigQuery'
|
||||
)
|
||||
|
||||
to_table_ref = self.connections.table_ref(to_relation.database,
|
||||
to_relation.schema,
|
||||
to_relation.identifier,
|
||||
conn)
|
||||
|
||||
self.cache_renamed(from_relation, to_relation)
|
||||
client.copy_table(from_table_ref, to_table_ref)
|
||||
client.delete_table(from_table_ref)
|
||||
|
||||
@available
|
||||
def list_schemas(self, database: str) -> List[str]:
|
||||
# the database string we get here is potentially quoted. Strip that off
|
||||
# for the API call.
|
||||
database = database.strip('`')
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
def query_schemas():
|
||||
# this is similar to how we have to deal with listing tables
|
||||
all_datasets = client.list_datasets(project=database,
|
||||
max_results=10000)
|
||||
return [ds.dataset_id for ds in all_datasets]
|
||||
|
||||
return self.connections._retry_and_handle(
|
||||
msg='list dataset', conn=conn, fn=query_schemas)
|
||||
|
||||
@available.parse(lambda *a, **k: False)
|
||||
def check_schema_exists(self, database: str, schema: str) -> bool:
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
bigquery_dataset = self.connections.dataset(
|
||||
database, schema, conn
|
||||
)
|
||||
# try to do things with the dataset. If it doesn't exist it will 404.
|
||||
# we have to do it this way to handle underscore-prefixed datasets,
|
||||
# which appear in neither the information_schema.schemata view nor the
|
||||
# list_datasets method.
|
||||
try:
|
||||
next(iter(client.list_tables(bigquery_dataset, max_results=1)))
|
||||
except StopIteration:
|
||||
pass
|
||||
except google.api_core.exceptions.NotFound:
|
||||
# the schema does not exist
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_columns_in_relation(
|
||||
self, relation: BigQueryRelation
|
||||
) -> List[BigQueryColumn]:
|
||||
try:
|
||||
table = self.connections.get_bq_table(
|
||||
database=relation.database,
|
||||
schema=relation.schema,
|
||||
identifier=relation.identifier
|
||||
)
|
||||
return self._get_dbt_columns_from_bq_table(table)
|
||||
|
||||
except (ValueError, google.cloud.exceptions.NotFound) as e:
|
||||
logger.debug("get_columns_in_relation error: {}".format(e))
|
||||
return []
|
||||
|
||||
def expand_column_types(
|
||||
self, goal: BigQueryRelation, current: BigQueryRelation
|
||||
) -> None:
|
||||
# This is a no-op on BigQuery
|
||||
pass
|
||||
|
||||
def expand_target_column_types(
|
||||
self, from_relation: BigQueryRelation, to_relation: BigQueryRelation
|
||||
) -> None:
|
||||
# This is a no-op on BigQuery
|
||||
pass
|
||||
|
||||
@available.parse_list
|
||||
def list_relations_without_caching(
|
||||
self, schema_relation: BigQueryRelation
|
||||
) -> List[BigQueryRelation]:
|
||||
connection = self.connections.get_thread_connection()
|
||||
client = connection.handle
|
||||
|
||||
bigquery_dataset = self.connections.dataset(
|
||||
schema_relation.database, schema_relation.schema, connection
|
||||
)
|
||||
|
||||
all_tables = client.list_tables(
|
||||
bigquery_dataset,
|
||||
# BigQuery paginates tables by alphabetizing them, and using
|
||||
# the name of the last table on a page as the key for the
|
||||
# next page. If that key table gets dropped before we run
|
||||
# list_relations, then this will 404. So, we avoid this
|
||||
# situation by making the page size sufficiently large.
|
||||
# see: https://github.com/dbt-labs/dbt/issues/726
|
||||
# TODO: cache the list of relations up front, and then we
|
||||
# won't need to do this
|
||||
max_results=100000)
|
||||
|
||||
# This will 404 if the dataset does not exist. This behavior mirrors
|
||||
# the implementation of list_relations for other adapters
|
||||
try:
|
||||
return [self._bq_table_to_relation(table) for table in all_tables]
|
||||
except google.api_core.exceptions.NotFound:
|
||||
return []
|
||||
|
||||
def get_relation(
|
||||
self, database: str, schema: str, identifier: str
|
||||
) -> BigQueryRelation:
|
||||
if self._schema_is_cached(database, schema):
|
||||
# if it's in the cache, use the parent's model of going through
|
||||
# the relations cache and picking out the relation
|
||||
return super().get_relation(
|
||||
database=database,
|
||||
schema=schema,
|
||||
identifier=identifier
|
||||
)
|
||||
|
||||
try:
|
||||
table = self.connections.get_bq_table(database, schema, identifier)
|
||||
except google.api_core.exceptions.NotFound:
|
||||
table = None
|
||||
return self._bq_table_to_relation(table)
|
||||
|
||||
def create_schema(self, relation: BigQueryRelation) -> None:
|
||||
database = relation.database
|
||||
schema = relation.schema
|
||||
logger.debug('Creating schema "{}.{}".', database, schema)
|
||||
self.connections.create_dataset(database, schema)
|
||||
|
||||
def drop_schema(self, relation: BigQueryRelation) -> None:
|
||||
database = relation.database
|
||||
schema = relation.schema
|
||||
logger.debug('Dropping schema "{}.{}".', database, schema)
|
||||
self.connections.drop_dataset(database, schema)
|
||||
self.cache.drop_schema(database, schema)
|
||||
|
||||
@classmethod
|
||||
def quote(cls, identifier: str) -> str:
|
||||
return '`{}`'.format(identifier)
|
||||
|
||||
@classmethod
|
||||
def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
|
||||
return "string"
|
||||
|
||||
@classmethod
|
||||
def convert_number_type(
|
||||
cls, agate_table: agate.Table, col_idx: int
|
||||
) -> str:
|
||||
decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
|
||||
return "float64" if decimals else "int64"
|
||||
|
||||
@classmethod
|
||||
def convert_boolean_type(
|
||||
cls, agate_table: agate.Table, col_idx: int
|
||||
) -> str:
|
||||
return "bool"
|
||||
|
||||
@classmethod
|
||||
def convert_datetime_type(
|
||||
cls, agate_table: agate.Table, col_idx: int
|
||||
) -> str:
|
||||
return "datetime"
|
||||
|
||||
@classmethod
|
||||
def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
|
||||
return "date"
|
||||
|
||||
@classmethod
|
||||
def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
|
||||
return "time"
|
||||
|
||||
###
|
||||
# Implementation details
|
||||
###
|
||||
def _make_match_kwargs(
|
||||
self, database: str, schema: str, identifier: str
|
||||
) -> Dict[str, str]:
|
||||
return filter_null_values({
|
||||
'database': database,
|
||||
'identifier': identifier,
|
||||
'schema': schema,
|
||||
})
|
||||
|
||||
def _get_dbt_columns_from_bq_table(self, table) -> List[BigQueryColumn]:
|
||||
"Translates BQ SchemaField dicts into dbt BigQueryColumn objects"
|
||||
|
||||
columns = []
|
||||
for col in table.schema:
|
||||
# BigQuery returns type labels that are not valid type specifiers
|
||||
dtype = self.Column.translate_type(col.field_type)
|
||||
column = self.Column(
|
||||
col.name, dtype, col.fields, col.mode)
|
||||
columns.append(column)
|
||||
|
||||
return columns
|
||||
|
||||
def _agate_to_schema(
|
||||
self, agate_table: agate.Table, column_override: Dict[str, str]
|
||||
) -> List[SchemaField]:
|
||||
"""Convert agate.Table with column names to a list of bigquery schemas.
|
||||
"""
|
||||
bq_schema = []
|
||||
for idx, col_name in enumerate(agate_table.column_names):
|
||||
inferred_type = self.convert_agate_type(agate_table, idx)
|
||||
type_ = column_override.get(col_name, inferred_type)
|
||||
bq_schema.append(SchemaField(col_name, type_))
|
||||
return bq_schema
|
||||
|
||||
def _materialize_as_view(self, model: Dict[str, Any]) -> str:
|
||||
model_database = model.get('database')
|
||||
model_schema = model.get('schema')
|
||||
model_alias = model.get('alias')
|
||||
model_sql = model.get('compiled_sql')
|
||||
|
||||
logger.debug("Model SQL ({}):\n{}".format(model_alias, model_sql))
|
||||
self.connections.create_view(
|
||||
database=model_database,
|
||||
schema=model_schema,
|
||||
table_name=model_alias,
|
||||
sql=model_sql
|
||||
)
|
||||
return "CREATE VIEW"
|
||||
|
||||
def _materialize_as_table(
|
||||
self,
|
||||
model: Dict[str, Any],
|
||||
model_sql: str,
|
||||
decorator: Optional[str] = None,
|
||||
) -> str:
|
||||
model_database = model.get('database')
|
||||
model_schema = model.get('schema')
|
||||
model_alias = model.get('alias')
|
||||
|
||||
if decorator is None:
|
||||
table_name = model_alias
|
||||
else:
|
||||
table_name = "{}${}".format(model_alias, decorator)
|
||||
|
||||
logger.debug("Model SQL ({}):\n{}".format(table_name, model_sql))
|
||||
self.connections.create_table(
|
||||
database=model_database,
|
||||
schema=model_schema,
|
||||
table_name=table_name,
|
||||
sql=model_sql
|
||||
)
|
||||
|
||||
return "CREATE TABLE"
|
||||
|
||||
@available.parse(lambda *a, **k: '')
|
||||
def copy_table(self, source, destination, materialization):
|
||||
if materialization == 'incremental':
|
||||
write_disposition = WRITE_APPEND
|
||||
elif materialization == 'table':
|
||||
write_disposition = WRITE_TRUNCATE
|
||||
else:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Copy table materialization must be "copy" or "table", but '
|
||||
f"config.get('copy_materialization', 'table') was "
|
||||
f'{materialization}')
|
||||
|
||||
self.connections.copy_bq_table(
|
||||
source, destination, write_disposition)
|
||||
|
||||
return "COPY TABLE with materialization: {}".format(materialization)
|
||||
|
||||
@classmethod
|
||||
def poll_until_job_completes(cls, job, timeout):
|
||||
retry_count = timeout
|
||||
|
||||
while retry_count > 0 and job.state != 'DONE':
|
||||
retry_count -= 1
|
||||
time.sleep(1)
|
||||
job.reload()
|
||||
|
||||
if job.state != 'DONE':
|
||||
raise dbt.exceptions.RuntimeException("BigQuery Timeout Exceeded")
|
||||
|
||||
elif job.error_result:
|
||||
message = '\n'.join(
|
||||
error['message'].strip() for error in job.errors
|
||||
)
|
||||
raise dbt.exceptions.RuntimeException(message)
|
||||
|
||||
def _bq_table_to_relation(self, bq_table):
|
||||
if bq_table is None:
|
||||
return None
|
||||
|
||||
return self.Relation.create(
|
||||
database=bq_table.project,
|
||||
schema=bq_table.dataset_id,
|
||||
identifier=bq_table.table_id,
|
||||
quote_policy={
|
||||
'schema': True,
|
||||
'identifier': True
|
||||
},
|
||||
type=self.RELATION_TYPES.get(
|
||||
bq_table.table_type, RelationType.External
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def warning_on_hooks(hook_type):
|
||||
msg = "{} is not supported in bigquery and will be ignored"
|
||||
print_timestamped_line(
|
||||
msg.format(hook_type), ui.COLOR_FG_YELLOW
|
||||
)
|
||||
|
||||
@available
|
||||
def add_query(self, sql, auto_begin=True, bindings=None,
|
||||
abridge_sql_log=False):
|
||||
if self.nice_connection_name() in ['on-run-start', 'on-run-end']:
|
||||
self.warning_on_hooks(self.nice_connection_name())
|
||||
else:
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'`add_query` is not implemented for this adapter!')
|
||||
|
||||
###
|
||||
# Special bigquery adapter methods
|
||||
###
|
||||
@available.parse_none
|
||||
def make_date_partitioned_table(self, relation):
|
||||
return self.connections.create_date_partitioned_table(
|
||||
database=relation.database,
|
||||
schema=relation.schema,
|
||||
table_name=relation.identifier
|
||||
)
|
||||
|
||||
@available.parse(lambda *a, **k: '')
|
||||
def execute_model(self, model, materialization, sql_override=None,
|
||||
decorator=None):
|
||||
|
||||
if sql_override is None:
|
||||
sql_override = model.get('compiled_sql')
|
||||
|
||||
if flags.STRICT_MODE:
|
||||
connection = self.connections.get_thread_connection()
|
||||
if not isinstance(connection, Connection):
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Got {connection} - not a Connection!'
|
||||
)
|
||||
model_uid = model.get('unique_id')
|
||||
if connection.name != model_uid:
|
||||
raise dbt.exceptions.InternalException(
|
||||
f'Connection had name "{connection.name}", expected model '
|
||||
f'unique id of "{model_uid}"'
|
||||
)
|
||||
|
||||
if materialization == 'view':
|
||||
res = self._materialize_as_view(model)
|
||||
elif materialization == 'table':
|
||||
res = self._materialize_as_table(model, sql_override, decorator)
|
||||
else:
|
||||
msg = "Invalid relation type: '{}'".format(materialization)
|
||||
raise dbt.exceptions.RuntimeException(msg, model)
|
||||
|
||||
return res
|
||||
|
||||
def _partitions_match(
|
||||
self, table, conf_partition: Optional[PartitionConfig]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the actual and configured partitions for a table are a match.
|
||||
BigQuery tables can be replaced if:
|
||||
- Both tables are not partitioned, OR
|
||||
- Both tables are partitioned using the exact same configs
|
||||
|
||||
If there is a mismatch, then the table cannot be replaced directly.
|
||||
"""
|
||||
is_partitioned = (table.range_partitioning or table.time_partitioning)
|
||||
|
||||
if not is_partitioned and not conf_partition:
|
||||
return True
|
||||
elif conf_partition and table.time_partitioning is not None:
|
||||
table_field = table.time_partitioning.field.lower()
|
||||
table_granularity = table.partitioning_type.lower()
|
||||
return table_field == conf_partition.field.lower() \
|
||||
and table_granularity == conf_partition.granularity.lower()
|
||||
elif conf_partition and table.range_partitioning is not None:
|
||||
dest_part = table.range_partitioning
|
||||
conf_part = conf_partition.range or {}
|
||||
|
||||
return dest_part.field == conf_partition.field \
|
||||
and dest_part.range_.start == conf_part.get('start') \
|
||||
and dest_part.range_.end == conf_part.get('end') \
|
||||
and dest_part.range_.interval == conf_part.get('interval')
|
||||
else:
|
||||
return False
|
||||
|
||||
def _clusters_match(self, table, conf_cluster) -> bool:
|
||||
"""
|
||||
Check if the actual and configured clustering columns for a table
|
||||
are a match. BigQuery tables can be replaced if clustering columns
|
||||
match exactly.
|
||||
"""
|
||||
if isinstance(conf_cluster, str):
|
||||
conf_cluster = [conf_cluster]
|
||||
|
||||
return table.clustering_fields == conf_cluster
|
||||
|
||||
@available.parse(lambda *a, **k: True)
|
||||
def is_replaceable(
|
||||
self,
|
||||
relation,
|
||||
conf_partition: Optional[PartitionConfig],
|
||||
conf_cluster
|
||||
) -> bool:
|
||||
"""
|
||||
Check if a given partition and clustering column spec for a table
|
||||
can replace an existing relation in the database. BigQuery does not
|
||||
allow tables to be replaced with another table that has a different
|
||||
partitioning spec. This method returns True if the given config spec is
|
||||
identical to that of the existing table.
|
||||
"""
|
||||
if not relation:
|
||||
return True
|
||||
|
||||
try:
|
||||
table = self.connections.get_bq_table(
|
||||
database=relation.database,
|
||||
schema=relation.schema,
|
||||
identifier=relation.identifier
|
||||
)
|
||||
except google.cloud.exceptions.NotFound:
|
||||
return True
|
||||
|
||||
return all((
|
||||
self._partitions_match(table, conf_partition),
|
||||
self._clusters_match(table, conf_cluster)
|
||||
))
|
||||
|
||||
@available
|
||||
def parse_partition_by(
|
||||
self, raw_partition_by: Any
|
||||
) -> Optional[PartitionConfig]:
|
||||
"""
|
||||
dbt v0.16.0 expects `partition_by` to be a dictionary where previously
|
||||
it was a string. Check the type of `partition_by`, raise error
|
||||
or warning if string, and attempt to convert to dict.
|
||||
"""
|
||||
return PartitionConfig.parse(raw_partition_by)
|
||||
|
||||
def get_table_ref_from_relation(self, conn, relation):
|
||||
return self.connections.table_ref(relation.database,
|
||||
relation.schema,
|
||||
relation.identifier,
|
||||
conn)
|
||||
|
||||
def _update_column_dict(self, bq_column_dict, dbt_columns, parent=''):
|
||||
"""
|
||||
Helper function to recursively traverse the schema of a table in the
|
||||
update_column_descriptions function below.
|
||||
|
||||
bq_column_dict should be a dict as obtained by the to_api_repr()
|
||||
function of a SchemaField object.
|
||||
"""
|
||||
if parent:
|
||||
dotted_column_name = '{}.{}'.format(parent, bq_column_dict['name'])
|
||||
else:
|
||||
dotted_column_name = bq_column_dict['name']
|
||||
|
||||
if dotted_column_name in dbt_columns:
|
||||
column_config = dbt_columns[dotted_column_name]
|
||||
bq_column_dict['description'] = column_config.get('description')
|
||||
if column_config.get('policy_tags'):
|
||||
bq_column_dict['policyTags'] = {
|
||||
'names': column_config.get('policy_tags')
|
||||
}
|
||||
|
||||
new_fields = []
|
||||
for child_col_dict in bq_column_dict.get('fields', list()):
|
||||
new_child_column_dict = self._update_column_dict(
|
||||
child_col_dict,
|
||||
dbt_columns,
|
||||
parent=dotted_column_name
|
||||
)
|
||||
new_fields.append(new_child_column_dict)
|
||||
|
||||
bq_column_dict['fields'] = new_fields
|
||||
|
||||
return bq_column_dict
|
||||
|
||||
@available.parse_none
|
||||
def update_columns(self, relation, columns):
|
||||
if len(columns) == 0:
|
||||
return
|
||||
|
||||
conn = self.connections.get_thread_connection()
|
||||
table_ref = self.get_table_ref_from_relation(conn, relation)
|
||||
table = conn.handle.get_table(table_ref)
|
||||
|
||||
new_schema = []
|
||||
for bq_column in table.schema:
|
||||
bq_column_dict = bq_column.to_api_repr()
|
||||
new_bq_column_dict = self._update_column_dict(
|
||||
bq_column_dict,
|
||||
columns
|
||||
)
|
||||
new_schema.append(SchemaField.from_api_repr(new_bq_column_dict))
|
||||
|
||||
new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
|
||||
conn.handle.update_table(new_table, ['schema'])
|
||||
|
||||
@available.parse_none
|
||||
def update_table_description(
|
||||
self, database: str, schema: str, identifier: str, description: str
|
||||
):
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
table_ref = self.connections.table_ref(
|
||||
database,
|
||||
schema,
|
||||
identifier,
|
||||
conn
|
||||
)
|
||||
table = client.get_table(table_ref)
|
||||
table.description = description
|
||||
client.update_table(table, ['description'])
|
||||
|
||||
@available.parse_none
|
||||
def alter_table_add_columns(self, relation, columns):
|
||||
|
||||
logger.debug('Adding columns ({}) to table {}".'.format(
|
||||
columns, relation))
|
||||
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
table_ref = self.connections.table_ref(relation.database,
|
||||
relation.schema,
|
||||
relation.identifier, conn)
|
||||
table = client.get_table(table_ref)
|
||||
|
||||
new_columns = [col.column_to_bq_schema() for col in columns]
|
||||
new_schema = table.schema + new_columns
|
||||
|
||||
new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
|
||||
client.update_table(new_table, ['schema'])
|
||||
|
||||
@available.parse_none
|
||||
def load_dataframe(self, database, schema, table_name, agate_table,
|
||||
column_override):
|
||||
bq_schema = self._agate_to_schema(agate_table, column_override)
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
table = self.connections.table_ref(database, schema, table_name, conn)
|
||||
|
||||
load_config = google.cloud.bigquery.LoadJobConfig()
|
||||
load_config.skip_leading_rows = 1
|
||||
load_config.schema = bq_schema
|
||||
|
||||
with open(agate_table.original_abspath, "rb") as f:
|
||||
job = client.load_table_from_file(f, table, rewind=True,
|
||||
job_config=load_config)
|
||||
|
||||
timeout = self.connections.get_timeout(conn)
|
||||
with self.connections.exception_handler("LOAD TABLE"):
|
||||
self.poll_until_job_completes(job, timeout)
|
||||
|
||||
@classmethod
|
||||
def _catalog_filter_table(
|
||||
cls, table: agate.Table, manifest: Manifest
|
||||
) -> agate.Table:
|
||||
table = table.rename(column_names={
|
||||
col.name: col.name.replace('__', ':') for col in table.columns
|
||||
})
|
||||
return super()._catalog_filter_table(table, manifest)
|
||||
|
||||
def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap:
|
||||
candidates = super()._get_catalog_schemas(manifest)
|
||||
db_schemas: Dict[str, Set[str]] = {}
|
||||
result = SchemaSearchMap()
|
||||
|
||||
for candidate, schemas in candidates.items():
|
||||
database = candidate.database
|
||||
if database not in db_schemas:
|
||||
db_schemas[database] = set(self.list_schemas(database))
|
||||
if candidate.schema in db_schemas[database]:
|
||||
result[candidate] = schemas
|
||||
else:
|
||||
logger.debug(
|
||||
'Skipping catalog for {}.{} - schema does not exist'
|
||||
.format(database, candidate.schema)
|
||||
)
|
||||
return result
|
||||
|
||||
@available.parse(lambda *a, **k: {})
|
||||
def get_common_options(
|
||||
self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
opts = {}
|
||||
|
||||
if (config.get('hours_to_expiration') is not None) and (not temporary):
|
||||
expiration = (
|
||||
'TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL '
|
||||
'{} hour)').format(config.get('hours_to_expiration'))
|
||||
opts['expiration_timestamp'] = expiration
|
||||
|
||||
if config.persist_relation_docs() and 'description' in node:
|
||||
description = sql_escape(node['description'])
|
||||
opts['description'] = '"""{}"""'.format(description)
|
||||
|
||||
if config.get('labels'):
|
||||
labels = config.get('labels', {})
|
||||
opts['labels'] = list(labels.items())
|
||||
|
||||
return opts
|
||||
|
||||
@available.parse(lambda *a, **k: {})
|
||||
def get_table_options(
|
||||
self, config: Dict[str, Any], node: Dict[str, Any], temporary: bool
|
||||
) -> Dict[str, Any]:
|
||||
opts = self.get_common_options(config, node, temporary)
|
||||
|
||||
if temporary:
|
||||
expiration = 'TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 12 hour)'
|
||||
opts['expiration_timestamp'] = expiration
|
||||
|
||||
if config.get('kms_key_name') is not None:
|
||||
opts['kms_key_name'] = "'{}'".format(config.get('kms_key_name'))
|
||||
|
||||
if config.get('require_partition_filter'):
|
||||
opts['require_partition_filter'] = config.get(
|
||||
'require_partition_filter')
|
||||
|
||||
if config.get('partition_expiration_days') is not None:
|
||||
opts['partition_expiration_days'] = config.get(
|
||||
'partition_expiration_days')
|
||||
|
||||
return opts
|
||||
|
||||
@available.parse(lambda *a, **k: {})
|
||||
def get_view_options(
|
||||
self, config: Dict[str, Any], node: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
opts = self.get_common_options(config, node)
|
||||
return opts
|
||||
|
||||
@available.parse_none
|
||||
def grant_access_to(self, entity, entity_type, role, grant_target_dict):
|
||||
"""
|
||||
Given an entity, grants it access to a permissioned dataset.
|
||||
"""
|
||||
conn = self.connections.get_thread_connection()
|
||||
client = conn.handle
|
||||
|
||||
GrantTarget.validate(grant_target_dict)
|
||||
grant_target = GrantTarget.from_dict(grant_target_dict)
|
||||
dataset = client.get_dataset(
|
||||
self.connections.dataset_from_id(grant_target.render())
|
||||
)
|
||||
|
||||
if entity_type == 'view':
|
||||
entity = self.connections.table_ref(
|
||||
entity.database,
|
||||
entity.schema,
|
||||
entity.identifier,
|
||||
conn).to_api_repr()
|
||||
|
||||
access_entry = AccessEntry(role, entity_type, entity)
|
||||
access_entries = dataset.access_entries
|
||||
|
||||
if access_entry in access_entries:
|
||||
logger.debug(f"Access entry {access_entry} "
|
||||
f"already exists in dataset")
|
||||
return
|
||||
|
||||
access_entries.append(AccessEntry(role, entity_type, entity))
|
||||
dataset.access_entries = access_entries
|
||||
client.update_dataset(dataset, ['access_entries'])
|
||||
|
||||
def get_rows_different_sql(
|
||||
self,
|
||||
relation_a: BigQueryRelation,
|
||||
relation_b: BigQueryRelation,
|
||||
column_names: Optional[List[str]] = None,
|
||||
except_operator='EXCEPT DISTINCT'
|
||||
) -> str:
|
||||
return super().get_rows_different_sql(
|
||||
relation_a=relation_a,
|
||||
relation_b=relation_b,
|
||||
column_names=column_names,
|
||||
except_operator=except_operator,
|
||||
)
|
||||
|
||||
def timestamp_add_sql(
|
||||
self, add_to: str, number: int = 1, interval: str = 'hour'
|
||||
) -> str:
|
||||
return f'timestamp_add({add_to}, interval {number} {interval})'
|
||||
|
||||
def string_add_sql(
|
||||
self, add_to: str, value: str, location='append',
|
||||
) -> str:
|
||||
if location == 'append':
|
||||
return f"concat({add_to}, '{value}')"
|
||||
elif location == 'prepend':
|
||||
return f"concat('{value}', {add_to})"
|
||||
else:
|
||||
raise dbt.exceptions.RuntimeException(
|
||||
f'Got an unexpected location value of "{location}"'
|
||||
)
|
||||
@@ -1,80 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from dbt.adapters.base.relation import (
|
||||
BaseRelation, ComponentName, InformationSchema
|
||||
)
|
||||
from dbt.utils import filter_null_values
|
||||
from typing import TypeVar
|
||||
|
||||
|
||||
Self = TypeVar('Self', bound='BigQueryRelation')
|
||||
|
||||
|
||||
@dataclass(frozen=True, eq=False, repr=False)
|
||||
class BigQueryRelation(BaseRelation):
|
||||
quote_character: str = '`'
|
||||
|
||||
def matches(
|
||||
self,
|
||||
database: Optional[str] = None,
|
||||
schema: Optional[str] = None,
|
||||
identifier: Optional[str] = None,
|
||||
) -> bool:
|
||||
search = filter_null_values({
|
||||
ComponentName.Database: database,
|
||||
ComponentName.Schema: schema,
|
||||
ComponentName.Identifier: identifier
|
||||
})
|
||||
|
||||
if not search:
|
||||
# nothing was passed in
|
||||
pass
|
||||
|
||||
for k, v in search.items():
|
||||
if not self._is_exactish_match(k, v):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
return self.database
|
||||
|
||||
@property
|
||||
def dataset(self):
|
||||
return self.schema
|
||||
|
||||
def information_schema(
|
||||
self, identifier: Optional[str] = None
|
||||
) -> 'BigQueryInformationSchema':
|
||||
return BigQueryInformationSchema.from_relation(self, identifier)
|
||||
|
||||
|
||||
@dataclass(frozen=True, eq=False, repr=False)
|
||||
class BigQueryInformationSchema(InformationSchema):
|
||||
quote_character: str = '`'
|
||||
|
||||
@classmethod
|
||||
def get_include_policy(cls, relation, information_schema_view):
|
||||
schema = True
|
||||
if information_schema_view in ('SCHEMATA', 'SCHEMATA_OPTIONS', None):
|
||||
schema = False
|
||||
|
||||
identifier = True
|
||||
if information_schema_view == '__TABLES__':
|
||||
identifier = False
|
||||
|
||||
return relation.include_policy.replace(
|
||||
schema=schema,
|
||||
identifier=identifier,
|
||||
)
|
||||
|
||||
def replace(self, **kwargs):
|
||||
if 'information_schema_view' in kwargs:
|
||||
view = kwargs['information_schema_view']
|
||||
# we also need to update the include policy, unless the caller did
|
||||
# in which case it's their problem
|
||||
if 'include_policy' not in kwargs:
|
||||
kwargs['include_policy'] = self.get_include_policy(self, view)
|
||||
return super().replace(**kwargs)
|
||||
@@ -1,2 +0,0 @@
|
||||
import os
|
||||
PACKAGE_PATH = os.path.dirname(__file__)
|
||||
@@ -1,5 +0,0 @@
|
||||
config-version: 2
|
||||
name: dbt_bigquery
|
||||
version: 1.0
|
||||
|
||||
macro-paths: ["macros"]
|
||||
@@ -1,196 +0,0 @@
|
||||
|
||||
{% macro partition_by(partition_config) -%}
|
||||
{%- if partition_config is none -%}
|
||||
{% do return('') %}
|
||||
{%- elif partition_config.data_type | lower in ('date','timestamp','datetime') -%}
|
||||
partition by {{ partition_config.render() }}
|
||||
{%- elif partition_config.data_type | lower in ('int64') -%}
|
||||
{%- set range = partition_config.range -%}
|
||||
partition by range_bucket(
|
||||
{{ partition_config.field }},
|
||||
generate_array({{ range.start}}, {{ range.end }}, {{ range.interval }})
|
||||
)
|
||||
{%- endif -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro cluster_by(raw_cluster_by) %}
|
||||
{%- if raw_cluster_by is not none -%}
|
||||
cluster by {% if raw_cluster_by is string -%}
|
||||
{% set raw_cluster_by = [raw_cluster_by] %}
|
||||
{%- endif -%}
|
||||
{%- for cluster in raw_cluster_by -%}
|
||||
{{ cluster }}
|
||||
{%- if not loop.last -%}, {% endif -%}
|
||||
{%- endfor -%}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro bigquery_options(opts) %}
|
||||
{% set options -%}
|
||||
OPTIONS({% for opt_key, opt_val in opts.items() %}
|
||||
{{ opt_key }}={{ opt_val }}{{ "," if not loop.last }}
|
||||
{% endfor %})
|
||||
{%- endset %}
|
||||
{%- do return(options) -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro bigquery_table_options(config, node, temporary) %}
|
||||
{% set opts = adapter.get_table_options(config, node, temporary) %}
|
||||
{%- do return(bigquery_options(opts)) -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro bigquery__create_table_as(temporary, relation, sql) -%}
|
||||
{%- set raw_partition_by = config.get('partition_by', none) -%}
|
||||
{%- set raw_cluster_by = config.get('cluster_by', none) -%}
|
||||
{%- set sql_header = config.get('sql_header', none) -%}
|
||||
|
||||
{%- set partition_config = adapter.parse_partition_by(raw_partition_by) -%}
|
||||
|
||||
{{ sql_header if sql_header is not none }}
|
||||
|
||||
create or replace table {{ relation }}
|
||||
{{ partition_by(partition_config) }}
|
||||
{{ cluster_by(raw_cluster_by) }}
|
||||
{{ bigquery_table_options(config, model, temporary) }}
|
||||
as (
|
||||
{{ sql }}
|
||||
);
|
||||
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro bigquery_view_options(config, node) %}
|
||||
{% set opts = adapter.get_view_options(config, node) %}
|
||||
{%- do return(bigquery_options(opts)) -%}
|
||||
{%- endmacro -%}
|
||||
|
||||
{% macro bigquery__create_view_as(relation, sql) -%}
|
||||
{%- set sql_header = config.get('sql_header', none) -%}
|
||||
|
||||
{{ sql_header if sql_header is not none }}
|
||||
|
||||
create or replace view {{ relation }}
|
||||
{{ bigquery_view_options(config, model) }}
|
||||
as {{ sql }};
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__create_schema(relation) -%}
|
||||
{{ adapter.create_schema(relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__drop_schema(relation) -%}
|
||||
{{ adapter.drop_schema(relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__drop_relation(relation) -%}
|
||||
{% call statement('drop_relation') -%}
|
||||
drop {{ relation.type }} if exists {{ relation }}
|
||||
{%- endcall %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__get_columns_in_relation(relation) -%}
|
||||
{{ return(adapter.get_columns_in_relation(relation)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__list_relations_without_caching(schema_relation) -%}
|
||||
{{ return(adapter.list_relations_without_caching(schema_relation)) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__current_timestamp() -%}
|
||||
CURRENT_TIMESTAMP()
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__snapshot_string_as_time(timestamp) -%}
|
||||
{%- set result = 'TIMESTAMP("' ~ timestamp ~ '")' -%}
|
||||
{{ return(result) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__list_schemas(database) -%}
|
||||
{{ return(adapter.list_schemas(database)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__check_schema_exists(information_schema, schema) %}
|
||||
{{ return(adapter.check_schema_exists(information_schema.database, schema)) }}
|
||||
{% endmacro %}
|
||||
|
||||
{#-- relation-level macro is not implemented. This is handled in the CTAs statement #}
|
||||
{% macro bigquery__persist_docs(relation, model, for_relation, for_columns) -%}
|
||||
{% if for_columns and config.persist_column_docs() and model.columns %}
|
||||
{% do alter_column_comment(relation, model.columns) %}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__alter_column_comment(relation, column_dict) -%}
|
||||
{% do adapter.update_columns(relation, column_dict) %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__rename_relation(from_relation, to_relation) -%}
|
||||
{% do adapter.rename_relation(from_relation, to_relation) %}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__alter_relation_add_columns(relation, add_columns) %}
|
||||
|
||||
{% set sql -%}
|
||||
|
||||
alter {{ relation.type }} {{ relation }}
|
||||
{% for column in add_columns %}
|
||||
add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
|
||||
{% endfor %}
|
||||
|
||||
{%- endset -%}
|
||||
|
||||
{{ return(run_query(sql)) }}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__alter_relation_drop_columns(relation, drop_columns) %}
|
||||
|
||||
{% set sql -%}
|
||||
|
||||
alter {{ relation.type }} {{ relation }}
|
||||
|
||||
{% for column in drop_columns %}
|
||||
drop column {{ column.name }}{{ ',' if not loop.last }}
|
||||
{% endfor %}
|
||||
|
||||
{%- endset -%}
|
||||
|
||||
{{ return(run_query(sql)) }}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro bigquery__alter_column_type(relation, column_name, new_column_type) -%}
|
||||
{#
|
||||
Changing a column's data type using a query requires you to scan the entire table.
|
||||
The query charges can be significant if the table is very large.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/manually-changing-schemas#changing_a_columns_data_type
|
||||
#}
|
||||
{% set relation_columns = get_columns_in_relation(relation) %}
|
||||
|
||||
{% set sql %}
|
||||
select
|
||||
{%- for col in relation_columns -%}
|
||||
{% if col.column == column_name %}
|
||||
CAST({{ col.quoted }} AS {{ new_column_type }}) AS {{ col.quoted }}
|
||||
{%- else %}
|
||||
{{ col.quoted }}
|
||||
{%- endif %}
|
||||
{%- if not loop.last %},{% endif -%}
|
||||
{%- endfor %}
|
||||
from {{ relation }}
|
||||
{% endset %}
|
||||
|
||||
{% call statement('alter_column_type') %}
|
||||
{{ create_table_as(False, relation, sql)}}
|
||||
{%- endcall %}
|
||||
|
||||
{% endmacro %}
|
||||
@@ -1,209 +0,0 @@
|
||||
|
||||
{% macro bigquery__get_catalog(information_schema, schemas) -%}
|
||||
|
||||
{%- if (schemas | length) == 0 -%}
|
||||
{# Hopefully nothing cares about the columns we return when there are no rows #}
|
||||
{%- set query = "select 1 as id limit 0" -%}
|
||||
{%- else -%}
|
||||
|
||||
{%- set query -%}
|
||||
with tables as (
|
||||
select
|
||||
project_id as table_database,
|
||||
dataset_id as table_schema,
|
||||
table_id as original_table_name,
|
||||
|
||||
concat(project_id, '.', dataset_id, '.', table_id) as relation_id,
|
||||
|
||||
row_count,
|
||||
size_bytes as size_bytes,
|
||||
case
|
||||
when type = 1 then 'table'
|
||||
when type = 2 then 'view'
|
||||
else 'external'
|
||||
end as table_type,
|
||||
|
||||
REGEXP_CONTAINS(table_id, '^.+[0-9]{8}$') and coalesce(type, 0) = 1 as is_date_shard,
|
||||
REGEXP_EXTRACT(table_id, '^(.+)[0-9]{8}$') as shard_base_name,
|
||||
REGEXP_EXTRACT(table_id, '^.+([0-9]{8})$') as shard_name
|
||||
|
||||
from {{ information_schema.replace(information_schema_view='__TABLES__') }}
|
||||
where (
|
||||
{%- for schema in schemas -%}
|
||||
upper(dataset_id) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
|
||||
{%- endfor -%}
|
||||
)
|
||||
),
|
||||
|
||||
extracted as (
|
||||
|
||||
select *,
|
||||
case
|
||||
when is_date_shard then shard_base_name
|
||||
else original_table_name
|
||||
end as table_name
|
||||
|
||||
from tables
|
||||
|
||||
),
|
||||
|
||||
unsharded_tables as (
|
||||
|
||||
select
|
||||
table_database,
|
||||
table_schema,
|
||||
table_name,
|
||||
coalesce(table_type, 'external') as table_type,
|
||||
is_date_shard,
|
||||
|
||||
struct(
|
||||
min(shard_name) as shard_min,
|
||||
max(shard_name) as shard_max,
|
||||
count(*) as shard_count
|
||||
) as table_shards,
|
||||
|
||||
sum(size_bytes) as size_bytes,
|
||||
sum(row_count) as row_count,
|
||||
|
||||
max(relation_id) as relation_id
|
||||
|
||||
from extracted
|
||||
group by 1,2,3,4,5
|
||||
|
||||
),
|
||||
|
||||
info_schema_columns as (
|
||||
|
||||
select
|
||||
concat(table_catalog, '.', table_schema, '.', table_name) as relation_id,
|
||||
table_catalog as table_database,
|
||||
table_schema,
|
||||
table_name,
|
||||
|
||||
-- use the "real" column name from the paths query below
|
||||
column_name as base_column_name,
|
||||
ordinal_position as column_index,
|
||||
|
||||
is_partitioning_column,
|
||||
clustering_ordinal_position
|
||||
|
||||
from {{ information_schema.replace(information_schema_view='COLUMNS') }}
|
||||
where ordinal_position is not null
|
||||
|
||||
),
|
||||
|
||||
info_schema_column_paths as (
|
||||
|
||||
select
|
||||
concat(table_catalog, '.', table_schema, '.', table_name) as relation_id,
|
||||
field_path as column_name,
|
||||
data_type as column_type,
|
||||
column_name as base_column_name,
|
||||
description as column_comment
|
||||
|
||||
from {{ information_schema.replace(information_schema_view='COLUMN_FIELD_PATHS') }}
|
||||
|
||||
),
|
||||
|
||||
columns as (
|
||||
|
||||
select * except (base_column_name)
|
||||
from info_schema_columns
|
||||
join info_schema_column_paths using (relation_id, base_column_name)
|
||||
|
||||
),
|
||||
|
||||
column_stats as (
|
||||
|
||||
select
|
||||
table_database,
|
||||
table_schema,
|
||||
table_name,
|
||||
max(relation_id) as relation_id,
|
||||
max(case when is_partitioning_column = 'YES' then 1 else 0 end) = 1 as is_partitioned,
|
||||
max(case when is_partitioning_column = 'YES' then column_name else null end) as partition_column,
|
||||
max(case when clustering_ordinal_position is not null then 1 else 0 end) = 1 as is_clustered,
|
||||
array_to_string(
|
||||
array_agg(
|
||||
case
|
||||
when clustering_ordinal_position is not null then column_name
|
||||
else null
|
||||
end ignore nulls
|
||||
order by clustering_ordinal_position
|
||||
), ', '
|
||||
) as clustering_columns
|
||||
|
||||
from columns
|
||||
group by 1,2,3
|
||||
|
||||
)
|
||||
|
||||
select
|
||||
unsharded_tables.table_database,
|
||||
unsharded_tables.table_schema,
|
||||
case
|
||||
when is_date_shard then concat(unsharded_tables.table_name, '*')
|
||||
else unsharded_tables.table_name
|
||||
end as table_name,
|
||||
unsharded_tables.table_type,
|
||||
|
||||
-- coalesce name and type for External tables - these columns are not
|
||||
-- present in the COLUMN_FIELD_PATHS resultset
|
||||
coalesce(columns.column_name, '<unknown>') as column_name,
|
||||
-- invent a row number to account for nested fields -- BQ does
|
||||
-- not treat these nested properties as independent fields
|
||||
row_number() over (
|
||||
partition by relation_id
|
||||
order by columns.column_index, columns.column_name
|
||||
) as column_index,
|
||||
coalesce(columns.column_type, '<unknown>') as column_type,
|
||||
columns.column_comment,
|
||||
|
||||
'Shard count' as `stats__date_shards__label`,
|
||||
table_shards.shard_count as `stats__date_shards__value`,
|
||||
'The number of date shards in this table' as `stats__date_shards__description`,
|
||||
is_date_shard as `stats__date_shards__include`,
|
||||
|
||||
'Shard (min)' as `stats__date_shard_min__label`,
|
||||
table_shards.shard_min as `stats__date_shard_min__value`,
|
||||
'The first date shard in this table' as `stats__date_shard_min__description`,
|
||||
is_date_shard as `stats__date_shard_min__include`,
|
||||
|
||||
'Shard (max)' as `stats__date_shard_max__label`,
|
||||
table_shards.shard_max as `stats__date_shard_max__value`,
|
||||
'The last date shard in this table' as `stats__date_shard_max__description`,
|
||||
is_date_shard as `stats__date_shard_max__include`,
|
||||
|
||||
'# Rows' as `stats__num_rows__label`,
|
||||
row_count as `stats__num_rows__value`,
|
||||
'Approximate count of rows in this table' as `stats__num_rows__description`,
|
||||
(unsharded_tables.table_type = 'table') as `stats__num_rows__include`,
|
||||
|
||||
'Approximate Size' as `stats__num_bytes__label`,
|
||||
size_bytes as `stats__num_bytes__value`,
|
||||
'Approximate size of table as reported by BigQuery' as `stats__num_bytes__description`,
|
||||
(unsharded_tables.table_type = 'table') as `stats__num_bytes__include`,
|
||||
|
||||
'Partitioned By' as `stats__partitioning_type__label`,
|
||||
partition_column as `stats__partitioning_type__value`,
|
||||
'The partitioning column for this table' as `stats__partitioning_type__description`,
|
||||
is_partitioned as `stats__partitioning_type__include`,
|
||||
|
||||
'Clustered By' as `stats__clustering_fields__label`,
|
||||
clustering_columns as `stats__clustering_fields__value`,
|
||||
'The clustering columns for this table' as `stats__clustering_fields__description`,
|
||||
is_clustered as `stats__clustering_fields__include`
|
||||
|
||||
-- join using relation_id (an actual relation, not a shard prefix) to make
|
||||
-- sure that column metadata is picked up through the join. This will only
|
||||
-- return the column information for the "max" table in a date-sharded table set
|
||||
from unsharded_tables
|
||||
left join columns using (relation_id)
|
||||
left join column_stats using (relation_id)
|
||||
{%- endset -%}
|
||||
|
||||
{%- endif -%}
|
||||
|
||||
{{ return(run_query(query)) }}
|
||||
|
||||
{%- endmacro %}
|
||||
@@ -1,15 +0,0 @@
|
||||
{% macro date_sharded_table(base_name) %}
|
||||
{{ return(base_name ~ "[DBT__PARTITION_DATE]") }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro grant_access_to(entity, entity_type, role, grant_target_dict) -%}
|
||||
{% do adapter.grant_access_to(entity, entity_type, role, grant_target_dict) %}
|
||||
{% endmacro %}
|
||||
|
||||
{%- macro get_partitions_metadata(table) -%}
|
||||
{%- if execute -%}
|
||||
{%- set res = adapter.get_partitions_metadata(table) -%}
|
||||
{{- return(res) -}}
|
||||
{%- endif -%}
|
||||
{{- return(None) -}}
|
||||
{%- endmacro -%}
|
||||
@@ -1,32 +0,0 @@
|
||||
{% materialization copy, adapter='bigquery' -%}
|
||||
|
||||
{# Setup #}
|
||||
{{ run_hooks(pre_hooks) }}
|
||||
|
||||
{% set destination = this.incorporate(type='table') %}
|
||||
|
||||
{# there can be several ref() or source() according to BQ copy API docs #}
|
||||
{# cycle over ref() and source() to create source tables array #}
|
||||
{% set source_array = [] %}
|
||||
{% for ref_table in model.refs %}
|
||||
{{ source_array.append(ref(*ref_table)) }}
|
||||
{% endfor %}
|
||||
|
||||
{% for src_table in model.sources %}
|
||||
{{ source_array.append(source(*src_table)) }}
|
||||
{% endfor %}
|
||||
|
||||
{# Call adapter's copy_table function #}
|
||||
{%- set result_str = adapter.copy_table(
|
||||
source_array,
|
||||
destination,
|
||||
config.get('copy_materialization', default = 'table')) -%}
|
||||
|
||||
{{ store_result('main', response=result_str) }}
|
||||
|
||||
{# Clean up #}
|
||||
{{ run_hooks(post_hooks) }}
|
||||
{{ adapter.commit() }}
|
||||
|
||||
{{ return({'relations': [destination]}) }}
|
||||
{%- endmaterialization %}
|
||||
@@ -1,189 +0,0 @@
|
||||
|
||||
{% macro dbt_bigquery_validate_get_incremental_strategy(config) %}
|
||||
{#-- Find and validate the incremental strategy #}
|
||||
{%- set strategy = config.get("incremental_strategy", default="merge") -%}
|
||||
|
||||
{% set invalid_strategy_msg -%}
|
||||
Invalid incremental strategy provided: {{ strategy }}
|
||||
Expected one of: 'merge', 'insert_overwrite'
|
||||
{%- endset %}
|
||||
{% if strategy not in ['merge', 'insert_overwrite'] %}
|
||||
{% do exceptions.raise_compiler_error(invalid_strategy_msg) %}
|
||||
{% endif %}
|
||||
|
||||
{% do return(strategy) %}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro bq_insert_overwrite(
|
||||
tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists
|
||||
) %}
|
||||
|
||||
{% if partitions is not none and partitions != [] %} {# static #}
|
||||
|
||||
{% set predicate -%}
|
||||
{{ partition_by.render(alias='DBT_INTERNAL_DEST') }} in (
|
||||
{{ partitions | join (', ') }}
|
||||
)
|
||||
{%- endset %}
|
||||
|
||||
{%- set source_sql -%}
|
||||
(
|
||||
{{sql}}
|
||||
)
|
||||
{%- endset -%}
|
||||
|
||||
{{ get_insert_overwrite_merge_sql(target_relation, source_sql, dest_columns, [predicate], include_sql_header=true) }}
|
||||
|
||||
{% else %} {# dynamic #}
|
||||
|
||||
{% set predicate -%}
|
||||
{{ partition_by.render(alias='DBT_INTERNAL_DEST') }} in unnest(dbt_partitions_for_replacement)
|
||||
{%- endset %}
|
||||
|
||||
{%- set source_sql -%}
|
||||
(
|
||||
select * from {{ tmp_relation }}
|
||||
)
|
||||
{%- endset -%}
|
||||
|
||||
-- generated script to merge partitions into {{ target_relation }}
|
||||
declare dbt_partitions_for_replacement array<{{ partition_by.data_type }}>;
|
||||
declare _dbt_max_partition {{ partition_by.data_type }} default (
|
||||
select max({{ partition_by.field }}) from {{ this }}
|
||||
where {{ partition_by.field }} is not null
|
||||
);
|
||||
|
||||
{# have we already created the temp table to check for schema changes? #}
|
||||
{% if not tmp_relation_exists %}
|
||||
-- 1. create a temp table
|
||||
{{ create_table_as(True, tmp_relation, sql) }}
|
||||
{% else %}
|
||||
-- 1. temp table already exists, we used it to check for schema changes
|
||||
{% endif %}
|
||||
|
||||
-- 2. define partitions to update
|
||||
set (dbt_partitions_for_replacement) = (
|
||||
select as struct
|
||||
array_agg(distinct {{ partition_by.render() }})
|
||||
from {{ tmp_relation }}
|
||||
);
|
||||
|
||||
{#
|
||||
TODO: include_sql_header is a hack; consider a better approach that includes
|
||||
the sql_header at the materialization-level instead
|
||||
#}
|
||||
-- 3. run the merge statement
|
||||
{{ get_insert_overwrite_merge_sql(target_relation, source_sql, dest_columns, [predicate], include_sql_header=false) }};
|
||||
|
||||
-- 4. clean up the temp table
|
||||
drop table if exists {{ tmp_relation }}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro bq_generate_incremental_build_sql(
|
||||
strategy, tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists
|
||||
) %}
|
||||
{#-- if partitioned, use BQ scripting to get the range of partition values to be updated --#}
|
||||
{% if strategy == 'insert_overwrite' %}
|
||||
|
||||
{% set missing_partition_msg -%}
|
||||
The 'insert_overwrite' strategy requires the `partition_by` config.
|
||||
{%- endset %}
|
||||
{% if partition_by is none %}
|
||||
{% do exceptions.raise_compiler_error(missing_partition_msg) %}
|
||||
{% endif %}
|
||||
|
||||
{% set build_sql = bq_insert_overwrite(
|
||||
tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, on_schema_change
|
||||
) %}
|
||||
|
||||
{% else %} {# strategy == 'merge' #}
|
||||
{%- set source_sql -%}
|
||||
{%- if tmp_relation_exists -%}
|
||||
(
|
||||
select * from {{ tmp_relation }}
|
||||
)
|
||||
{%- else -%} {#-- wrap sql in parens to make it a subquery --#}
|
||||
(
|
||||
{{sql}}
|
||||
)
|
||||
{%- endif -%}
|
||||
{%- endset -%}
|
||||
|
||||
{% set build_sql = get_merge_sql(target_relation, source_sql, unique_key, dest_columns) %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{{ return(build_sql) }}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% materialization incremental, adapter='bigquery' -%}
|
||||
|
||||
{%- set unique_key = config.get('unique_key') -%}
|
||||
{%- set full_refresh_mode = (should_full_refresh()) -%}
|
||||
|
||||
{%- set target_relation = this %}
|
||||
{%- set existing_relation = load_relation(this) %}
|
||||
{%- set tmp_relation = make_temp_relation(this) %}
|
||||
|
||||
{#-- Validate early so we don't run SQL if the strategy is invalid --#}
|
||||
{% set strategy = dbt_bigquery_validate_get_incremental_strategy(config) -%}
|
||||
|
||||
{%- set raw_partition_by = config.get('partition_by', none) -%}
|
||||
{%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%}
|
||||
{%- set partitions = config.get('partitions', none) -%}
|
||||
{%- set cluster_by = config.get('cluster_by', none) -%}
|
||||
|
||||
{% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
|
||||
|
||||
{{ run_hooks(pre_hooks) }}
|
||||
|
||||
{% if existing_relation is none %}
|
||||
{% set build_sql = create_table_as(False, target_relation, sql) %}
|
||||
|
||||
{% elif existing_relation.is_view %}
|
||||
{#-- There's no way to atomically replace a view with a table on BQ --#}
|
||||
{{ adapter.drop_relation(existing_relation) }}
|
||||
{% set build_sql = create_table_as(False, target_relation, sql) %}
|
||||
|
||||
{% elif full_refresh_mode %}
|
||||
{#-- If the partition/cluster config has changed, then we must drop and recreate --#}
|
||||
{% if not adapter.is_replaceable(existing_relation, partition_by, cluster_by) %}
|
||||
{% do log("Hard refreshing " ~ existing_relation ~ " because it is not replaceable") %}
|
||||
{{ adapter.drop_relation(existing_relation) }}
|
||||
{% endif %}
|
||||
{% set build_sql = create_table_as(False, target_relation, sql) %}
|
||||
|
||||
{% else %}
|
||||
{% set tmp_relation_exists = false %}
|
||||
{% if on_schema_change != 'ignore' %} {# Check first, since otherwise we may not build a temp table #}
|
||||
{% do run_query(create_table_as(True, tmp_relation, sql)) %}
|
||||
{% set tmp_relation_exists = true %}
|
||||
{% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
|
||||
{% endif %}
|
||||
|
||||
{% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}
|
||||
{% set build_sql = bq_generate_incremental_build_sql(
|
||||
strategy, tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists
|
||||
) %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{%- call statement('main') -%}
|
||||
{{ build_sql }}
|
||||
{% endcall %}
|
||||
|
||||
{{ run_hooks(post_hooks) }}
|
||||
|
||||
{% set target_relation = this.incorporate(type='table') %}
|
||||
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{{ return({'relations': [target_relation]}) }}
|
||||
|
||||
{%- endmaterialization %}
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
{% macro bigquery__create_csv_table(model, agate_table) %}
|
||||
-- no-op
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__reset_csv_table(model, full_refresh, old_relation, agate_table) %}
|
||||
{{ adapter.drop_relation(old_relation) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__load_csv_rows(model, agate_table) %}
|
||||
|
||||
{%- set column_override = model['config'].get('column_types', {}) -%}
|
||||
{{ adapter.load_dataframe(model['database'], model['schema'], model['alias'],
|
||||
agate_table, column_override) }}
|
||||
{% if config.persist_relation_docs() and 'description' in model %}
|
||||
|
||||
{{ adapter.update_table_description(model['database'], model['schema'], model['alias'], model['description']) }}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
@@ -1,15 +0,0 @@
|
||||
{% macro bigquery__snapshot_hash_arguments(args) -%}
|
||||
to_hex(md5(concat({%- for arg in args -%}
|
||||
coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif -%}
|
||||
{%- endfor -%}
|
||||
)))
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro bigquery__create_columns(relation, columns) %}
|
||||
{{ adapter.alter_table_add_columns(relation, columns) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro bigquery__post_snapshot(staging_relation) %}
|
||||
-- Clean up the snapshot temp table
|
||||
{% do drop_relation(staging_relation) %}
|
||||
{% endmacro %}
|
||||
@@ -1,84 +0,0 @@
|
||||
{% macro make_date_partitioned_table(model, relation, dates, should_create, verbose=False) %}
|
||||
|
||||
{% if should_create %}
|
||||
{{ adapter.make_date_partitioned_table(relation) }}
|
||||
{% endif %}
|
||||
|
||||
{% for date in dates %}
|
||||
{% set date = (date | string) %}
|
||||
{% if verbose %}
|
||||
{% set table_start_time = modules.datetime.datetime.now().strftime("%H:%M:%S") %}
|
||||
{{ log(table_start_time ~ ' | -> Running for day ' ~ date, info=True) }}
|
||||
{% endif %}
|
||||
|
||||
{% set fixed_sql = model['compiled_sql'] | replace('[DBT__PARTITION_DATE]', date) %}
|
||||
{% set _ = adapter.execute_model(model, 'table', fixed_sql, decorator=date) %}
|
||||
{% endfor %}
|
||||
|
||||
{% set num_days = dates | length %}
|
||||
{% if num_days == 1 %}
|
||||
{% set result_str = 'CREATED 1 PARTITION' %}
|
||||
{% else %}
|
||||
{% set result_str = 'CREATED ' ~ num_days ~ ' PARTITIONS' %}
|
||||
{% endif %}
|
||||
|
||||
{{ store_result('main', response=result_str) }}
|
||||
|
||||
{% endmacro %}
|
||||
|
||||
{% materialization table, adapter='bigquery' -%}
|
||||
|
||||
{%- set identifier = model['alias'] -%}
|
||||
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
|
||||
{%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%}
|
||||
{%- set target_relation = api.Relation.create(database=database, schema=schema, identifier=identifier, type='table') -%}
|
||||
{%- set verbose = config.get('verbose', False) -%}
|
||||
|
||||
{# partitions: iterate over each partition, running a separate query in a for-loop #}
|
||||
{%- set partitions = config.get('partitions') -%}
|
||||
|
||||
{% if partitions %}
|
||||
{% if partitions is number or partitions is string %}
|
||||
{% set partitions = [(partitions | string)] %}
|
||||
{% endif %}
|
||||
|
||||
{% if partitions is not iterable %}
|
||||
{{ exceptions.raise_compiler_error("Provided `partitions` configuration is not a list. Got: " ~ partitions, model) }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{{ run_hooks(pre_hooks) }}
|
||||
|
||||
{#
|
||||
Since dbt uses WRITE_TRUNCATE mode for tables, we only need to drop this thing
|
||||
if it is not a table. If it _is_ already a table, then we can overwrite it without downtime
|
||||
#}
|
||||
{%- if exists_not_as_table -%}
|
||||
{{ adapter.drop_relation(old_relation) }}
|
||||
{%- endif -%}
|
||||
|
||||
-- build model
|
||||
{% if partitions %}
|
||||
{# Create the dp-table if 1. it does not exist or 2. it existed, but we just dropped it #}
|
||||
{%- set should_create = (old_relation is none or exists_not_as_table) -%}
|
||||
{{ make_date_partitioned_table(model, target_relation, partitions, should_create, verbose) }}
|
||||
{% else %}
|
||||
{%- set raw_partition_by = config.get('partition_by', none) -%}
|
||||
{%- set partition_by = adapter.parse_partition_by(raw_partition_by) -%}
|
||||
{%- set cluster_by = config.get('cluster_by', none) -%}
|
||||
{% if not adapter.is_replaceable(old_relation, partition_by, cluster_by) %}
|
||||
{% do log("Hard refreshing " ~ old_relation ~ " because it is not replaceable") %}
|
||||
{% do adapter.drop_relation(old_relation) %}
|
||||
{% endif %}
|
||||
{% call statement('main') -%}
|
||||
{{ create_table_as(False, target_relation, sql) }}
|
||||
{% endcall -%}
|
||||
{% endif %}
|
||||
|
||||
{{ run_hooks(post_hooks) }}
|
||||
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{{ return({'relations': [target_relation]}) }}
|
||||
|
||||
{% endmaterialization %}
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
{% macro bigquery__handle_existing_table(full_refresh, old_relation) %}
|
||||
{%- if full_refresh -%}
|
||||
{{ adapter.drop_relation(old_relation) }}
|
||||
{%- else -%}
|
||||
{{ exceptions.relation_wrong_type(old_relation, 'view') }}
|
||||
{%- endif -%}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% materialization view, adapter='bigquery' -%}
|
||||
{% set to_return = create_or_replace_view() %}
|
||||
|
||||
{% set target_relation = this.incorporate(type='view') %}
|
||||
{% do persist_docs(target_relation, model) %}
|
||||
|
||||
{% if config.get('grant_access_to') %}
|
||||
{% for grant_target_dict in config.get('grant_access_to') %}
|
||||
{% do adapter.grant_access_to(this, 'view', None, grant_target_dict) %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% do return(to_return) %}
|
||||
|
||||
{%- endmaterialization %}
|
||||
@@ -1,26 +0,0 @@
|
||||
default:
|
||||
outputs:
|
||||
|
||||
dev:
|
||||
type: bigquery
|
||||
method: oauth
|
||||
project: [GCP project id]
|
||||
dataset: [the name of your dbt dataset] # You can also use "schema" here
|
||||
threads: [1 or more]
|
||||
timeout_seconds: 300
|
||||
location: US # Optional, one of US or EU
|
||||
priority: interactive
|
||||
retries: 1
|
||||
|
||||
prod:
|
||||
type: bigquery
|
||||
method: service-account
|
||||
project: [GCP project id]
|
||||
dataset: [the name of your dbt dataset]
|
||||
threads: [1 or more]
|
||||
keyfile: [/path/to/bigquery/keyfile.json]
|
||||
timeout_seconds: 300
|
||||
priority: interactive
|
||||
retries: 1
|
||||
|
||||
target: dev
|
||||
@@ -1,73 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 6):
|
||||
print('Error: dbt does not support this version of Python.')
|
||||
print('Please upgrade to Python 3.6 or higher.')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
from setuptools import setup
|
||||
try:
|
||||
from setuptools import find_namespace_packages
|
||||
except ImportError:
|
||||
# the user has a downlevel version of setuptools.
|
||||
print('Error: dbt requires setuptools v40.1.0 or higher.')
|
||||
print('Please upgrade setuptools with "pip install --upgrade setuptools" '
|
||||
'and try again')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
package_name = "dbt-bigquery"
|
||||
package_version = "0.21.0b1"
|
||||
description = """The bigquery adapter plugin for dbt (data build tool)"""
|
||||
|
||||
this_directory = os.path.abspath(os.path.dirname(__file__))
|
||||
with open(os.path.join(this_directory, 'README.md')) as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version=package_version,
|
||||
description=description,
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author="dbt Labs",
|
||||
author_email="info@dbtlabs.com",
|
||||
url="https://github.com/dbt-labs/dbt",
|
||||
packages=find_namespace_packages(include=['dbt', 'dbt.*']),
|
||||
package_data={
|
||||
'dbt': [
|
||||
'include/bigquery/dbt_project.yml',
|
||||
'include/bigquery/sample_profiles.yml',
|
||||
'include/bigquery/macros/*.sql',
|
||||
'include/bigquery/macros/**/*.sql',
|
||||
]
|
||||
},
|
||||
install_requires=[
|
||||
'dbt-core=={}'.format(package_version),
|
||||
'protobuf>=3.13.0,<4',
|
||||
'google-cloud-core>=1.3.0,<2',
|
||||
'google-cloud-bigquery>=1.25.0,<3',
|
||||
'google-api-core>=1.16.0,<2',
|
||||
'googleapis-common-protos>=1.6.0,<2',
|
||||
'six>=1.14.0',
|
||||
],
|
||||
zip_safe=False,
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
|
||||
'Operating System :: Microsoft :: Windows',
|
||||
'Operating System :: MacOS :: MacOS X',
|
||||
'Operating System :: POSIX :: Linux',
|
||||
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
],
|
||||
python_requires=">=3.6.2",
|
||||
)
|
||||
@@ -1,32 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/dbt-labs/dbt/6c6649f9129d5d108aa3b0526f634cd8f3a9d1ed/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
|
||||
</p>
|
||||
|
||||
**[dbt](https://www.getdbt.com/)** (data build tool) enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
|
||||
|
||||
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
|
||||
|
||||
## dbt-postgres
|
||||
|
||||
The `dbt-postgres` package contains all of the code required to make dbt operate on a Postgres database. For
|
||||
more information on using dbt with Postgres, consult [the docs](https://docs.getdbt.com/docs/profile-postgres).
|
||||
|
||||
|
||||
## Find out more
|
||||
|
||||
- Check out the [Introduction to dbt](https://docs.getdbt.com/docs/introduction/).
|
||||
- Read the [dbt Viewpoint](https://docs.getdbt.com/docs/about/viewpoint/).
|
||||
|
||||
## Join thousands of analysts in the dbt community
|
||||
|
||||
- Join the [chat](http://community.getdbt.com/) on Slack.
|
||||
- Find community posts on [dbt Discourse](https://discourse.getdbt.com).
|
||||
|
||||
## Reporting bugs and contributing code
|
||||
|
||||
- Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt/issues/new).
|
||||
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](https://github.com/dbt-labs/dbt/blob/HEAD/CONTRIBUTING.md)
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [dbt Code of Conduct](https://community.getdbt.com/code-of-conduct).
|
||||
@@ -1,14 +0,0 @@
|
||||
# these are mostly just exports, #noqa them so flake8 will be happy
|
||||
from dbt.adapters.postgres.connections import PostgresConnectionManager # noqa
|
||||
from dbt.adapters.postgres.connections import PostgresCredentials
|
||||
from dbt.adapters.postgres.relation import PostgresColumn # noqa
|
||||
from dbt.adapters.postgres.relation import PostgresRelation # noqa: F401
|
||||
from dbt.adapters.postgres.impl import PostgresAdapter
|
||||
|
||||
from dbt.adapters.base import AdapterPlugin
|
||||
from dbt.include import postgres
|
||||
|
||||
Plugin = AdapterPlugin(
|
||||
adapter=PostgresAdapter,
|
||||
credentials=PostgresCredentials,
|
||||
include_path=postgres.PACKAGE_PATH)
|
||||
@@ -1 +0,0 @@
|
||||
version = '0.21.0b1'
|
||||
@@ -1,186 +0,0 @@
|
||||
from contextlib import contextmanager
|
||||
|
||||
import psycopg2
|
||||
|
||||
import dbt.exceptions
|
||||
from dbt.adapters.base import Credentials
|
||||
from dbt.adapters.sql import SQLConnectionManager
|
||||
from dbt.contracts.connection import AdapterResponse
|
||||
from dbt.logger import GLOBAL_LOGGER as logger
|
||||
|
||||
from dbt.helper_types import Port
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresCredentials(Credentials):
|
||||
host: str
|
||||
user: str
|
||||
port: Port
|
||||
password: str # on postgres the password is mandatory
|
||||
connect_timeout: int = 10
|
||||
role: Optional[str] = None
|
||||
search_path: Optional[str] = None
|
||||
keepalives_idle: int = 0 # 0 means to use the default value
|
||||
sslmode: Optional[str] = None
|
||||
sslcert: Optional[str] = None
|
||||
sslkey: Optional[str] = None
|
||||
sslrootcert: Optional[str] = None
|
||||
application_name: Optional[str] = 'dbt'
|
||||
|
||||
_ALIASES = {
|
||||
'dbname': 'database',
|
||||
'pass': 'password'
|
||||
}
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return 'postgres'
|
||||
|
||||
@property
|
||||
def unique_field(self):
|
||||
return self.host
|
||||
|
||||
def _connection_keys(self):
|
||||
return ('host', 'port', 'user', 'database', 'schema', 'search_path',
|
||||
'keepalives_idle', 'sslmode')
|
||||
|
||||
|
||||
class PostgresConnectionManager(SQLConnectionManager):
|
||||
TYPE = 'postgres'
|
||||
|
||||
@contextmanager
|
||||
def exception_handler(self, sql):
|
||||
try:
|
||||
yield
|
||||
|
||||
except psycopg2.DatabaseError as e:
|
||||
logger.debug('Postgres error: {}'.format(str(e)))
|
||||
|
||||
try:
|
||||
self.rollback_if_open()
|
||||
except psycopg2.Error:
|
||||
logger.debug("Failed to release connection!")
|
||||
pass
|
||||
|
||||
raise dbt.exceptions.DatabaseException(str(e).strip()) from e
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Error running SQL: {}", sql)
|
||||
logger.debug("Rolling back transaction.")
|
||||
self.rollback_if_open()
|
||||
if isinstance(e, dbt.exceptions.RuntimeException):
|
||||
# during a sql query, an internal to dbt exception was raised.
|
||||
# this sounds a lot like a signal handler and probably has
|
||||
# useful information, so raise it without modification.
|
||||
raise
|
||||
|
||||
raise dbt.exceptions.RuntimeException(e) from e
|
||||
|
||||
@classmethod
|
||||
def open(cls, connection):
|
||||
if connection.state == 'open':
|
||||
logger.debug('Connection is already open, skipping open.')
|
||||
return connection
|
||||
|
||||
credentials = cls.get_credentials(connection.credentials)
|
||||
kwargs = {}
|
||||
# we don't want to pass 0 along to connect() as postgres will try to
|
||||
# call an invalid setsockopt() call (contrary to the docs).
|
||||
if credentials.keepalives_idle:
|
||||
kwargs['keepalives_idle'] = credentials.keepalives_idle
|
||||
|
||||
# psycopg2 doesn't support search_path officially,
|
||||
# see https://github.com/psycopg/psycopg2/issues/465
|
||||
search_path = credentials.search_path
|
||||
if search_path is not None and search_path != '':
|
||||
# see https://postgresql.org/docs/9.5/libpq-connect.html
|
||||
kwargs['options'] = '-c search_path={}'.format(
|
||||
search_path.replace(' ', '\\ '))
|
||||
|
||||
if credentials.sslmode:
|
||||
kwargs['sslmode'] = credentials.sslmode
|
||||
|
||||
if credentials.sslcert is not None:
|
||||
kwargs["sslcert"] = credentials.sslcert
|
||||
|
||||
if credentials.sslkey is not None:
|
||||
kwargs["sslkey"] = credentials.sslkey
|
||||
|
||||
if credentials.sslrootcert is not None:
|
||||
kwargs["sslrootcert"] = credentials.sslrootcert
|
||||
|
||||
if credentials.application_name:
|
||||
kwargs['application_name'] = credentials.application_name
|
||||
|
||||
try:
|
||||
handle = psycopg2.connect(
|
||||
dbname=credentials.database,
|
||||
user=credentials.user,
|
||||
host=credentials.host,
|
||||
password=credentials.password,
|
||||
port=credentials.port,
|
||||
connect_timeout=credentials.connect_timeout,
|
||||
**kwargs)
|
||||
|
||||
if credentials.role:
|
||||
handle.cursor().execute('set role {}'.format(credentials.role))
|
||||
|
||||
connection.handle = handle
|
||||
connection.state = 'open'
|
||||
except psycopg2.Error as e:
|
||||
logger.debug("Got an error when attempting to open a postgres "
|
||||
"connection: '{}'"
|
||||
.format(e))
|
||||
|
||||
connection.handle = None
|
||||
connection.state = 'fail'
|
||||
|
||||
raise dbt.exceptions.FailedToConnectException(str(e))
|
||||
|
||||
return connection
|
||||
|
||||
def cancel(self, connection):
|
||||
connection_name = connection.name
|
||||
try:
|
||||
pid = connection.handle.get_backend_pid()
|
||||
except psycopg2.InterfaceError as exc:
|
||||
# if the connection is already closed, not much to cancel!
|
||||
if 'already closed' in str(exc):
|
||||
logger.debug(
|
||||
f'Connection {connection_name} was already closed'
|
||||
)
|
||||
return
|
||||
# probably bad, re-raise it
|
||||
raise
|
||||
|
||||
sql = "select pg_terminate_backend({})".format(pid)
|
||||
|
||||
logger.debug("Cancelling query '{}' ({})".format(connection_name, pid))
|
||||
|
||||
_, cursor = self.add_query(sql)
|
||||
res = cursor.fetchone()
|
||||
|
||||
logger.debug("Cancel query '{}': {}".format(connection_name, res))
|
||||
|
||||
@classmethod
|
||||
def get_credentials(cls, credentials):
|
||||
return credentials
|
||||
|
||||
@classmethod
|
||||
def get_response(cls, cursor) -> AdapterResponse:
|
||||
message = str(cursor.statusmessage)
|
||||
rows = cursor.rowcount
|
||||
status_message_parts = message.split() if message is not None else []
|
||||
status_messsage_strings = [
|
||||
part
|
||||
for part in status_message_parts
|
||||
if not part.isdigit()
|
||||
]
|
||||
code = ' '.join(status_messsage_strings)
|
||||
return AdapterResponse(
|
||||
_message=message,
|
||||
code=code,
|
||||
rows_affected=rows
|
||||
)
|
||||
@@ -1,142 +0,0 @@
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Set, List, Any
|
||||
from dbt.adapters.base.meta import available
|
||||
from dbt.adapters.base.impl import AdapterConfig
|
||||
from dbt.adapters.sql import SQLAdapter
|
||||
from dbt.adapters.postgres import PostgresConnectionManager
|
||||
from dbt.adapters.postgres import PostgresColumn
|
||||
from dbt.adapters.postgres import PostgresRelation
|
||||
from dbt.dataclass_schema import dbtClassMixin, ValidationError
|
||||
import dbt.exceptions
|
||||
import dbt.utils
|
||||
|
||||
|
||||
# note that this isn't an adapter macro, so just a single underscore
|
||||
GET_RELATIONS_MACRO_NAME = 'postgres_get_relations'
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresIndexConfig(dbtClassMixin):
|
||||
columns: List[str]
|
||||
unique: bool = False
|
||||
type: Optional[str] = None
|
||||
|
||||
def render(self, relation):
|
||||
# We append the current timestamp to the index name because otherwise
|
||||
# the index will only be created on every other run. See
|
||||
# https://github.com/dbt-labs/dbt/issues/1945#issuecomment-576714925
|
||||
# for an explanation.
|
||||
now = datetime.utcnow().isoformat()
|
||||
inputs = (self.columns +
|
||||
[relation.render(), str(self.unique), str(self.type), now])
|
||||
string = '_'.join(inputs)
|
||||
return dbt.utils.md5(string)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, raw_index) -> Optional['PostgresIndexConfig']:
|
||||
if raw_index is None:
|
||||
return None
|
||||
try:
|
||||
cls.validate(raw_index)
|
||||
return cls.from_dict(raw_index)
|
||||
except ValidationError as exc:
|
||||
msg = dbt.exceptions.validator_error_message(exc)
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Could not parse index config: {msg}'
|
||||
)
|
||||
except TypeError:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
f'Invalid index config:\n'
|
||||
f' Got: {raw_index}\n'
|
||||
f' Expected a dictionary with at minimum a "columns" key'
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PostgresConfig(AdapterConfig):
|
||||
unlogged: Optional[bool] = None
|
||||
indexes: Optional[List[PostgresIndexConfig]] = None
|
||||
|
||||
|
||||
class PostgresAdapter(SQLAdapter):
|
||||
Relation = PostgresRelation
|
||||
ConnectionManager = PostgresConnectionManager
|
||||
Column = PostgresColumn
|
||||
|
||||
AdapterSpecificConfigs = PostgresConfig
|
||||
|
||||
@classmethod
|
||||
def date_function(cls):
|
||||
return 'now()'
|
||||
|
||||
@available
|
||||
def verify_database(self, database):
|
||||
if database.startswith('"'):
|
||||
database = database.strip('"')
|
||||
expected = self.config.credentials.database
|
||||
if database.lower() != expected.lower():
|
||||
raise dbt.exceptions.NotImplementedException(
|
||||
'Cross-db references not allowed in {} ({} vs {})'
|
||||
.format(self.type(), database, expected)
|
||||
)
|
||||
# return an empty string on success so macros can call this
|
||||
return ''
|
||||
|
||||
@available
|
||||
def parse_index(self, raw_index: Any) -> Optional[PostgresIndexConfig]:
|
||||
return PostgresIndexConfig.parse(raw_index)
|
||||
|
||||
def _link_cached_database_relations(self, schemas: Set[str]):
|
||||
"""
|
||||
:param schemas: The set of schemas that should have links added.
|
||||
"""
|
||||
database = self.config.credentials.database
|
||||
table = self.execute_macro(GET_RELATIONS_MACRO_NAME)
|
||||
|
||||
for (dep_schema, dep_name, refed_schema, refed_name) in table:
|
||||
dependent = self.Relation.create(
|
||||
database=database,
|
||||
schema=dep_schema,
|
||||
identifier=dep_name
|
||||
)
|
||||
referenced = self.Relation.create(
|
||||
database=database,
|
||||
schema=refed_schema,
|
||||
identifier=refed_name
|
||||
)
|
||||
|
||||
# don't record in cache if this relation isn't in a relevant
|
||||
# schema
|
||||
if refed_schema.lower() in schemas:
|
||||
self.cache.add_link(referenced, dependent)
|
||||
|
||||
def _get_catalog_schemas(self, manifest):
|
||||
# postgres only allow one database (the main one)
|
||||
schemas = super()._get_catalog_schemas(manifest)
|
||||
try:
|
||||
return schemas.flatten()
|
||||
except dbt.exceptions.RuntimeException as exc:
|
||||
dbt.exceptions.raise_compiler_error(
|
||||
'Cross-db references not allowed in adapter {}: Got {}'.format(
|
||||
self.type(), exc.msg
|
||||
)
|
||||
)
|
||||
|
||||
def _link_cached_relations(self, manifest):
|
||||
schemas: Set[str] = set()
|
||||
relations_schemas = self._get_cache_schemas(manifest)
|
||||
for relation in relations_schemas:
|
||||
self.verify_database(relation.database)
|
||||
schemas.add(relation.schema.lower())
|
||||
|
||||
self._link_cached_database_relations(schemas)
|
||||
|
||||
def _relations_cache_for_schemas(self, manifest):
|
||||
super()._relations_cache_for_schemas(manifest)
|
||||
self._link_cached_relations(manifest)
|
||||
|
||||
def timestamp_add_sql(
|
||||
self, add_to: str, number: int = 1, interval: str = 'hour'
|
||||
) -> str:
|
||||
return f"{add_to} + interval '{number} {interval}'"
|
||||
@@ -1,29 +0,0 @@
|
||||
from dbt.adapters.base import Column
|
||||
from dataclasses import dataclass
|
||||
from dbt.adapters.base.relation import BaseRelation
|
||||
from dbt.exceptions import RuntimeException
|
||||
|
||||
|
||||
@dataclass(frozen=True, eq=False, repr=False)
|
||||
class PostgresRelation(BaseRelation):
|
||||
def __post_init__(self):
|
||||
# Check for length of Postgres table/view names.
|
||||
# Check self.type to exclude test relation identifiers
|
||||
if (self.identifier is not None and self.type is not None and
|
||||
len(self.identifier) > self.relation_max_name_length()):
|
||||
raise RuntimeException(
|
||||
f"Relation name '{self.identifier}' "
|
||||
f"is longer than {self.relation_max_name_length()} characters"
|
||||
)
|
||||
|
||||
def relation_max_name_length(self):
|
||||
return 63
|
||||
|
||||
|
||||
class PostgresColumn(Column):
|
||||
@property
|
||||
def data_type(self):
|
||||
# on postgres, do not convert 'text' to 'varchar()'
|
||||
if self.dtype.lower() == 'text':
|
||||
return self.dtype
|
||||
return super().data_type
|
||||
@@ -1,2 +0,0 @@
|
||||
import os
|
||||
PACKAGE_PATH = os.path.dirname(__file__)
|
||||
@@ -1,5 +0,0 @@
|
||||
config-version: 2
|
||||
name: dbt_postgres
|
||||
version: 1.0
|
||||
|
||||
macro-paths: ["macros"]
|
||||
@@ -1,188 +0,0 @@
|
||||
{% macro postgres__create_table_as(temporary, relation, sql) -%}
|
||||
{%- set unlogged = config.get('unlogged', default=false) -%}
|
||||
{%- set sql_header = config.get('sql_header', none) -%}
|
||||
|
||||
{{ sql_header if sql_header is not none }}
|
||||
|
||||
create {% if temporary -%}
|
||||
temporary
|
||||
{%- elif unlogged -%}
|
||||
unlogged
|
||||
{%- endif %} table {{ relation }}
|
||||
as (
|
||||
{{ sql }}
|
||||
);
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__get_create_index_sql(relation, index_dict) -%}
|
||||
{%- set index_config = adapter.parse_index(index_dict) -%}
|
||||
{%- set comma_separated_columns = ", ".join(index_config.columns) -%}
|
||||
{%- set index_name = index_config.render(relation) -%}
|
||||
|
||||
create {% if index_config.unique -%}
|
||||
unique
|
||||
{%- endif %} index if not exists
|
||||
"{{ index_name }}"
|
||||
on {{ relation }} {% if index_config.type -%}
|
||||
using {{ index_config.type }}
|
||||
{%- endif %}
|
||||
({{ comma_separated_columns }});
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__create_schema(relation) -%}
|
||||
{% if relation.database -%}
|
||||
{{ adapter.verify_database(relation.database) }}
|
||||
{%- endif -%}
|
||||
{%- call statement('create_schema') -%}
|
||||
create schema if not exists {{ relation.without_identifier().include(database=False) }}
|
||||
{%- endcall -%}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro postgres__drop_schema(relation) -%}
|
||||
{% if relation.database -%}
|
||||
{{ adapter.verify_database(relation.database) }}
|
||||
{%- endif -%}
|
||||
{%- call statement('drop_schema') -%}
|
||||
drop schema if exists {{ relation.without_identifier().include(database=False) }} cascade
|
||||
{%- endcall -%}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro postgres__get_columns_in_relation(relation) -%}
|
||||
{% call statement('get_columns_in_relation', fetch_result=True) %}
|
||||
select
|
||||
column_name,
|
||||
data_type,
|
||||
character_maximum_length,
|
||||
numeric_precision,
|
||||
numeric_scale
|
||||
|
||||
from {{ relation.information_schema('columns') }}
|
||||
where table_name = '{{ relation.identifier }}'
|
||||
{% if relation.schema %}
|
||||
and table_schema = '{{ relation.schema }}'
|
||||
{% endif %}
|
||||
order by ordinal_position
|
||||
|
||||
{% endcall %}
|
||||
{% set table = load_result('get_columns_in_relation').table %}
|
||||
{{ return(sql_convert_columns_in_relation(table)) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro postgres__list_relations_without_caching(schema_relation) %}
|
||||
{% call statement('list_relations_without_caching', fetch_result=True) -%}
|
||||
select
|
||||
'{{ schema_relation.database }}' as database,
|
||||
tablename as name,
|
||||
schemaname as schema,
|
||||
'table' as type
|
||||
from pg_tables
|
||||
where schemaname ilike '{{ schema_relation.schema }}'
|
||||
union all
|
||||
select
|
||||
'{{ schema_relation.database }}' as database,
|
||||
viewname as name,
|
||||
schemaname as schema,
|
||||
'view' as type
|
||||
from pg_views
|
||||
where schemaname ilike '{{ schema_relation.schema }}'
|
||||
{% endcall %}
|
||||
{{ return(load_result('list_relations_without_caching').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro postgres__information_schema_name(database) -%}
|
||||
{% if database_name -%}
|
||||
{{ adapter.verify_database(database_name) }}
|
||||
{%- endif -%}
|
||||
information_schema
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__list_schemas(database) %}
|
||||
{% if database -%}
|
||||
{{ adapter.verify_database(database) }}
|
||||
{%- endif -%}
|
||||
{% call statement('list_schemas', fetch_result=True, auto_begin=False) %}
|
||||
select distinct nspname from pg_namespace
|
||||
{% endcall %}
|
||||
{{ return(load_result('list_schemas').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
{% macro postgres__check_schema_exists(information_schema, schema) -%}
|
||||
{% if information_schema.database -%}
|
||||
{{ adapter.verify_database(information_schema.database) }}
|
||||
{%- endif -%}
|
||||
{% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %}
|
||||
select count(*) from pg_namespace where nspname = '{{ schema }}'
|
||||
{% endcall %}
|
||||
{{ return(load_result('check_schema_exists').table) }}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro postgres__current_timestamp() -%}
|
||||
now()
|
||||
{%- endmacro %}
|
||||
|
||||
{% macro postgres__snapshot_string_as_time(timestamp) -%}
|
||||
{%- set result = "'" ~ timestamp ~ "'::timestamp without time zone" -%}
|
||||
{{ return(result) }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro postgres__snapshot_get_time() -%}
|
||||
{{ current_timestamp() }}::timestamp without time zone
|
||||
{%- endmacro %}
|
||||
|
||||
{#
|
||||
Postgres tables have a maximum length off 63 characters, anything longer is silently truncated.
|
||||
Temp relations add a lot of extra characters to the end of table namers to ensure uniqueness.
|
||||
To prevent this going over the character limit, the base_relation name is truncated to ensure
|
||||
that name + suffix + uniquestring is < 63 characters.
|
||||
#}
|
||||
{% macro postgres__make_temp_relation(base_relation, suffix) %}
|
||||
{% set dt = modules.datetime.datetime.now() %}
|
||||
{% set dtstring = dt.strftime("%H%M%S%f") %}
|
||||
{% set suffix_length = suffix|length + dtstring|length %}
|
||||
{% set relation_max_name_length = 63 %}
|
||||
{% if suffix_length > relation_max_name_length %}
|
||||
{% do exceptions.raise_compiler_error('Temp relation suffix is too long (' ~ suffix|length ~ ' characters). Maximum length is ' ~ (relation_max_name_length - dtstring|length) ~ ' characters.') %}
|
||||
{% endif %}
|
||||
{% set tmp_identifier = base_relation.identifier[:relation_max_name_length - suffix_length] ~ suffix ~ dtstring %}
|
||||
{% do return(base_relation.incorporate(
|
||||
path={
|
||||
"identifier": tmp_identifier,
|
||||
"schema": none,
|
||||
"database": none
|
||||
})) -%}
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{#
|
||||
By using dollar-quoting like this, users can embed anything they want into their comments
|
||||
(including nested dollar-quoting), as long as they do not use this exact dollar-quoting
|
||||
label. It would be nice to just pick a new one but eventually you do have to give up.
|
||||
#}
|
||||
{% macro postgres_escape_comment(comment) -%}
|
||||
{% if comment is not string %}
|
||||
{% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %}
|
||||
{% endif %}
|
||||
{%- set magic = '$dbt_comment_literal_block$' -%}
|
||||
{%- if magic in comment -%}
|
||||
{%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%}
|
||||
{%- endif -%}
|
||||
{{ magic }}{{ comment }}{{ magic }}
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
{% macro postgres__alter_relation_comment(relation, comment) %}
|
||||
{% set escaped_comment = postgres_escape_comment(comment) %}
|
||||
comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }};
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro postgres__alter_column_comment(relation, column_dict) %}
|
||||
{% for column_name in column_dict %}
|
||||
{% set comment = column_dict[column_name]['description'] %}
|
||||
{% set escaped_comment = postgres_escape_comment(comment) %}
|
||||
comment on column {{ relation }}.{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} is {{ escaped_comment }};
|
||||
{% endfor %}
|
||||
{% endmacro %}
|
||||
@@ -1,53 +0,0 @@
|
||||
|
||||
{% macro postgres__get_catalog(information_schema, schemas) -%}
|
||||
|
||||
{%- call statement('catalog', fetch_result=True) -%}
|
||||
{#
|
||||
If the user has multiple databases set and the first one is wrong, this will fail.
|
||||
But we won't fail in the case where there are multiple quoting-difference-only dbs, which is better.
|
||||
#}
|
||||
{% set database = information_schema.database %}
|
||||
{{ adapter.verify_database(database) }}
|
||||
|
||||
select
|
||||
'{{ database }}' as table_database,
|
||||
sch.nspname as table_schema,
|
||||
tbl.relname as table_name,
|
||||
case tbl.relkind
|
||||
when 'v' then 'VIEW'
|
||||
else 'BASE TABLE'
|
||||
end as table_type,
|
||||
tbl_desc.description as table_comment,
|
||||
col.attname as column_name,
|
||||
col.attnum as column_index,
|
||||
pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type,
|
||||
col_desc.description as column_comment,
|
||||
pg_get_userbyid(tbl.relowner) as table_owner
|
||||
|
||||
from pg_catalog.pg_namespace sch
|
||||
join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid
|
||||
join pg_catalog.pg_attribute col on col.attrelid = tbl.oid
|
||||
left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0)
|
||||
left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum)
|
||||
|
||||
where (
|
||||
{%- for schema in schemas -%}
|
||||
upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
|
||||
{%- endfor -%}
|
||||
)
|
||||
and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session
|
||||
and tbl.relpersistence = 'p' -- [p]ermanent table. Other values are [u]nlogged table, [t]emporary table
|
||||
and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view
|
||||
and col.attnum > 0 -- negative numbers are used for system columns such as oid
|
||||
and not col.attisdropped -- column as not been dropped
|
||||
|
||||
order by
|
||||
sch.nspname,
|
||||
tbl.relname,
|
||||
col.attnum
|
||||
|
||||
{%- endcall -%}
|
||||
|
||||
{{ return(load_result('catalog').table) }}
|
||||
|
||||
{%- endmacro %}
|
||||
@@ -1,18 +0,0 @@
|
||||
|
||||
{% macro postgres__snapshot_merge_sql(target, source, insert_cols) -%}
|
||||
{%- set insert_cols_csv = insert_cols | join(', ') -%}
|
||||
|
||||
update {{ target }}
|
||||
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
|
||||
from {{ source }} as DBT_INTERNAL_SOURCE
|
||||
where DBT_INTERNAL_SOURCE.dbt_scd_id::text = {{ target }}.dbt_scd_id::text
|
||||
and DBT_INTERNAL_SOURCE.dbt_change_type::text in ('update'::text, 'delete'::text)
|
||||
and {{ target }}.dbt_valid_to is null;
|
||||
|
||||
insert into {{ target }} ({{ insert_cols_csv }})
|
||||
select {% for column in insert_cols -%}
|
||||
DBT_INTERNAL_SOURCE.{{ column }} {%- if not loop.last %}, {%- endif %}
|
||||
{%- endfor %}
|
||||
from {{ source }} as DBT_INTERNAL_SOURCE
|
||||
where DBT_INTERNAL_SOURCE.dbt_change_type::text = 'insert'::text;
|
||||
{% endmacro %}
|
||||
@@ -1,76 +0,0 @@
|
||||
{% macro postgres_get_relations () -%}
|
||||
|
||||
{#
|
||||
-- in pg_depend, objid is the dependent, refobjid is the referenced object
|
||||
-- > a pg_depend entry indicates that the referenced object cannot be
|
||||
-- > dropped without also dropping the dependent object.
|
||||
#}
|
||||
|
||||
{%- call statement('relations', fetch_result=True) -%}
|
||||
with relation as (
|
||||
select
|
||||
pg_rewrite.ev_class as class,
|
||||
pg_rewrite.oid as id
|
||||
from pg_rewrite
|
||||
),
|
||||
class as (
|
||||
select
|
||||
oid as id,
|
||||
relname as name,
|
||||
relnamespace as schema,
|
||||
relkind as kind
|
||||
from pg_class
|
||||
),
|
||||
dependency as (
|
||||
select
|
||||
pg_depend.objid as id,
|
||||
pg_depend.refobjid as ref
|
||||
from pg_depend
|
||||
),
|
||||
schema as (
|
||||
select
|
||||
pg_namespace.oid as id,
|
||||
pg_namespace.nspname as name
|
||||
from pg_namespace
|
||||
where nspname != 'information_schema' and nspname not like 'pg\_%'
|
||||
),
|
||||
referenced as (
|
||||
select
|
||||
relation.id AS id,
|
||||
referenced_class.name ,
|
||||
referenced_class.schema ,
|
||||
referenced_class.kind
|
||||
from relation
|
||||
join class as referenced_class on relation.class=referenced_class.id
|
||||
where referenced_class.kind in ('r', 'v')
|
||||
),
|
||||
relationships as (
|
||||
select
|
||||
referenced.name as referenced_name,
|
||||
referenced.schema as referenced_schema_id,
|
||||
dependent_class.name as dependent_name,
|
||||
dependent_class.schema as dependent_schema_id,
|
||||
referenced.kind as kind
|
||||
from referenced
|
||||
join dependency on referenced.id=dependency.id
|
||||
join class as dependent_class on dependency.ref=dependent_class.id
|
||||
where
|
||||
(referenced.name != dependent_class.name or
|
||||
referenced.schema != dependent_class.schema)
|
||||
)
|
||||
|
||||
select
|
||||
referenced_schema.name as referenced_schema,
|
||||
relationships.referenced_name as referenced_name,
|
||||
dependent_schema.name as dependent_schema,
|
||||
relationships.dependent_name as dependent_name
|
||||
from relationships
|
||||
join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id
|
||||
join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id
|
||||
group by referenced_schema, referenced_name, dependent_schema, dependent_name
|
||||
order by referenced_schema, referenced_name, dependent_schema, dependent_name;
|
||||
|
||||
{%- endcall -%}
|
||||
|
||||
{{ return(load_result('relations').table) }}
|
||||
{% endmacro %}
|
||||
@@ -1,24 +0,0 @@
|
||||
default:
|
||||
outputs:
|
||||
|
||||
dev:
|
||||
type: postgres
|
||||
threads: [1 or more]
|
||||
host: [host]
|
||||
port: [port]
|
||||
user: [dev_username]
|
||||
pass: [dev_password]
|
||||
dbname: [dbname]
|
||||
schema: [dev_schema]
|
||||
|
||||
prod:
|
||||
type: postgres
|
||||
threads: [1 or more]
|
||||
host: [host]
|
||||
port: [port]
|
||||
user: [prod_username]
|
||||
pass: [prod_password]
|
||||
dbname: [dbname]
|
||||
schema: [prod_schema]
|
||||
|
||||
target: dev
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user