Compare commits

..

1 Commits

Author SHA1 Message Date
Michelle Ark
1552eccb05 update index.html for dbt docs 2024-05-22 17:47:27 -04:00
344 changed files with 3335 additions and 17854 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.10.0a1
current_version = 1.9.0a1
parse = (?P<major>[\d]+) # major version number
\.(?P<minor>[\d]+) # minor version number
\.(?P<patch>[\d]+) # patch version number

View File

@@ -0,0 +1,6 @@
kind: Dependencies
body: Remove logbook dependency
time: 2024-05-09T09:37:17.745129-05:00
custom:
Author: emmyoop
Issue: "8027"

View File

@@ -0,0 +1,6 @@
kind: Docs
body: Fix rendering docs with saved queries
time: 2024-05-22T17:47:13.414938-04:00
custom:
Author: ChenyuLInx michelleark
Issue: "10168"

View File

@@ -0,0 +1,6 @@
kind: Features
body: serialize inferred primary key
time: 2024-05-06T17:56:42.757673-05:00
custom:
Author: dave-connors-3
Issue: "9824"

View File

@@ -0,0 +1,6 @@
kind: Features
body: 'Add unit_test: selection method'
time: 2024-05-07T16:27:17.047585-04:00
custom:
Author: michelleark
Issue: "10053"

View File

@@ -1,6 +0,0 @@
kind: Features
body: Add new hard_deletes="new_record" mode for snapshots.
time: 2024-11-04T12:00:53.95191-05:00
custom:
Author: peterallenwebb
Issue: "10235"

View File

@@ -1,6 +0,0 @@
kind: Features
body: Add `batch` context object to model jinja context
time: 2024-11-21T12:56:30.715473-06:00
custom:
Author: QMalcolm
Issue: "11025"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Remove unused check_new method
time: 2023-06-01T20:41:57.556342+02:00
custom:
Author: kevinneville
Issue: "7586"

View File

@@ -0,0 +1,7 @@
kind: Fixes
body: 'Restore previous behavior for --favor-state: only favor defer_relation if not
selected in current command"'
time: 2024-05-08T15:11:27.510912+02:00
custom:
Author: jtcohen6
Issue: "10107"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Unit test fixture (csv) returns null for empty value
time: 2024-05-09T09:14:11.772709-04:00
custom:
Author: michelleark
Issue: "9881"

View File

@@ -0,0 +1,7 @@
kind: Fixes
body: Fix json format log and --quiet for ls and jinja print by converting print call
to fire events
time: 2024-05-16T15:39:13.896723-07:00
custom:
Author: ChenyuLInx
Issue: "8756"

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Add resource type to saved_query
time: 2024-05-16T22:35:10.287514-07:00
custom:
Author: ChenyuLInx
Issue: "10168"

View File

@@ -1,6 +0,0 @@
kind: Fixes
body: dbt retry does not respect --threads
time: 2024-08-22T12:21:32.358066+05:30
custom:
Author: donjin-master
Issue: "10584"

View File

@@ -1,6 +0,0 @@
kind: Fixes
body: Catch DbtRuntimeError for hooks
time: 2024-11-21T18:17:39.753235Z
custom:
Author: aranke
Issue: "11012"

View File

@@ -0,0 +1,6 @@
kind: Security
body: Explicitly bind to localhost in docs serve
time: 2024-05-22T09:45:40.748185-04:00
custom:
Author: ChenyuLInx michelleark
Issue: "10209"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Clear error message for Private package in dbt-core
time: 2024-05-02T15:44:30.713097-07:00
custom:
Author: ChenyuLInx
Issue: "10083"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Enable use of context in serialization
time: 2024-05-06T14:55:11.1812-04:00
custom:
Author: gshank
Issue: "10093"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Make RSS high water mark measurement more accurate on Linux
time: 2024-05-19T15:59:46.700842315-04:00
custom:
Author: peterallenwebb
Issue: "10177"

View File

@@ -7,7 +7,6 @@ ignore =
W503 # makes Flake8 work like black
W504
E203 # makes Flake8 work like black
E704 # makes Flake8 work like black
E741
E501 # long line checking is done in black
exclude = test/

View File

@@ -1,18 +0,0 @@
name: 📄 Code docs
description: Report an issue for markdown files within this repo, such as README, ARCHITECTURE, etc.
title: "[Code docs] <title>"
labels: ["triage"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this code docs issue!
- type: textarea
attributes:
label: Please describe the issue and your proposals.
description: |
Links? References? Anything that will give us more context about the issue you are encountering!
Tip: You can attach images by clicking this area to highlight it and then dragging files in.
validations:
required: false

View File

@@ -1,8 +1,5 @@
blank_issues_enabled: false
contact_links:
- name: Documentation
url: https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose
about: Problems and issues with dbt product documentation hosted on docs.getdbt.com. Issues for markdown files within this repo, such as README, should be opened using the "Code docs" template.
- name: Ask the community for help
url: https://github.com/dbt-labs/docs.getdbt.com/discussions
about: Need help troubleshooting? Check out our guide on how to ask

View File

@@ -5,15 +5,6 @@ runs:
steps:
- shell: bash
run: |
sudo apt-get --purge remove postgresql postgresql-*
sudo apt update -y
sudo apt install gnupg2 wget vim -y
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc|sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
sudo apt update -y
sudo apt install postgresql-16
sudo apt-get -y install postgresql postgresql-contrib
sudo systemctl start postgresql
sudo systemctl enable postgresql
sudo systemctl start postgresql.service
pg_isready
sudo -u postgres bash ${{ github.action_path }}/setup_db.sh

View File

@@ -5,9 +5,7 @@ runs:
steps:
- shell: bash
run: |
brew install postgresql@16
brew link postgresql@16 --force
brew services start postgresql@16
brew services start postgresql
echo "Check PostgreSQL service is running"
i=10
COMMAND='pg_isready'

View File

@@ -5,22 +5,8 @@ runs:
steps:
- shell: pwsh
run: |
Write-Host -Object "Installing PostgreSQL 16 as windows service..."
$installerArgs = @("--install_runtimes 0", "--superpassword root", "--enable_acledit 1", "--unattendedmodeui none", "--mode unattended")
$filePath = Invoke-DownloadWithRetry -Url "https://get.enterprisedb.com/postgresql/postgresql-16.1-1-windows-x64.exe" -Path "$env:PGROOT/postgresql-16.1-1-windows-x64.exe"
Start-Process -FilePath $filePath -ArgumentList $installerArgs -Wait -PassThru
Write-Host -Object "Validating PostgreSQL 16 Install..."
Get-Service -Name postgresql*
$pgReady = Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
$exitCode = $pgReady.ExitCode
if ($exitCode -ne 0) {
Write-Host -Object "PostgreSQL is not ready. Exitcode: $exitCode"
exit $exitCode
}
Write-Host -Object "Starting PostgreSQL 16 Service..."
$pgService = Get-Service -Name postgresql-x64-16
$pgService = Get-Service -Name postgresql*
Set-Service -InputObject $pgService -Status running -StartupType automatic
Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
$env:Path += ";$env:PGBIN"
bash ${{ github.action_path }}/setup_db.sh

View File

@@ -1,7 +1,7 @@
Resolves #
resolves #
<!---
Include the number of the issue addressed by this PR above, if applicable.
Include the number of the issue addressed by this PR above if applicable.
PRs for code changes without an associated issue *will not be merged*.
See CONTRIBUTING.md for more information.
@@ -26,8 +26,8 @@ Resolves #
### Checklist
- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me.
- [ ] I have run this code in development, and it appears to resolve the stated issue.
- [ ] This PR includes tests, or tests are not required or relevant for this PR.
- [ ] This PR has no interface changes (e.g., macros, CLI, logs, JSON artifacts, config files, adapter interface, etc.) or this PR has already received feedback and approval from Product or DX.
- [ ] This PR includes [type annotations](https://docs.python.org/3/library/typing.html) for new and modified functions.
- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me
- [ ] I have run this code in development and it appears to resolve the stated issue
- [ ] This PR includes tests, or tests are not required/relevant for this PR
- [ ] This PR has no interface changes (e.g. macros, cli, logs, json artifacts, config files, adapter interface, etc) or this PR has already received feedback and approval from Product or DX
- [ ] This PR includes [type annotations](https://docs.python.org/3/library/typing.html) for new and modified functions

View File

@@ -32,7 +32,7 @@ jobs:
run: |
echo "CI failure: Artifact changes checked in core/dbt/artifacts directory."
echo "Files changed: ${{ steps.check_artifact_changes.outputs.artifacts_changed_files }}"
echo "To bypass this check, confirm that the change is not breaking (https://github.com/dbt-labs/dbt-core/blob/main/core/dbt/artifacts/README.md#breaking-changes) and add the 'artifact_minor_upgrade' label to the PR. Modifications and additions to all fields require updates to https://github.com/dbt-labs/dbt-jsonschema."
echo "To bypass this check, confirm that the change is not breaking (https://github.com/dbt-labs/dbt-core/blob/main/core/dbt/artifacts/README.md#breaking-changes) and add the 'artifact_minor_upgrade' label to the PR."
exit 1
- name: CI check passed

View File

@@ -36,6 +36,6 @@ jobs:
uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
with:
issue_repository: "dbt-labs/docs.getdbt.com"
issue_title: "[Core] Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated.\n Originating from this issue: https://github.com/dbt-labs/dbt-core/issues/${{ github.event.issue.number }}"
issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
secrets: inherit

View File

@@ -52,14 +52,13 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
python-version: '3.8'
- name: Install python dependencies
run: |
python -m pip install --user --upgrade pip
python -m pip --version
make dev
make dev_req
mypy --version
dbt --version
@@ -75,7 +74,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
env:
TOXENV: "unit"
@@ -140,7 +139,7 @@ jobs:
- name: generate include
id: generate-include
run: |
INCLUDE=('"python-version":"3.9","os":"windows-latest"' '"python-version":"3.9","os":"macos-14"' )
INCLUDE=('"python-version":"3.8","os":"windows-latest"' '"python-version":"3.8","os":"macos-12"' )
INCLUDE_GROUPS="["
for include in ${INCLUDE[@]}; do
for group in $(seq 1 ${{ env.PYTHON_INTEGRATION_TEST_WORKERS }}); do
@@ -162,7 +161,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
os: [ubuntu-20.04]
split-group: ${{ fromJson(needs.integration-metadata.outputs.split-groups) }}
include: ${{ fromJson(needs.integration-metadata.outputs.include) }}
@@ -264,7 +263,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
python-version: '3.8'
- name: Install python dependencies
run: |

View File

@@ -150,7 +150,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: "3.8"
- name: Install dbt
run: pip install dbt-postgres==${{ needs.set-variables.outputs.release_id }}

View File

@@ -247,24 +247,3 @@ jobs:
secrets:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_CORE_ALERTS }}
testing-slack-notification:
# sends notifications to #slackbot-test
name: Testing - Slack Notification
if: ${{ failure() && inputs.test_run && !inputs.nightly_release }}
needs:
[
bump-version-generate-changelog,
build-test-package,
github-release,
pypi-release,
docker-release,
]
uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main
with:
status: "failure"
secrets:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_TESTING_WEBHOOK_URL }}

View File

@@ -30,14 +30,14 @@ env:
jobs:
checking-schemas:
name: "Post-merge schema changes required"
name: "Checking schemas"
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.9
python-version: 3.8
- name: Checkout dbt repo
uses: actions/checkout@v4

View File

@@ -76,7 +76,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: "3.8"
- name: Install python dependencies
run: |

View File

@@ -27,6 +27,7 @@ on:
description: 'Version of Python to Test Against'
type: choice
options:
- '3.8'
- '3.9'
- '3.10'
- '3.11'
@@ -35,7 +36,7 @@ on:
type: choice
options:
- 'ubuntu-latest'
- 'macos-14'
- 'macos-12'
- 'windows-latest'
num_runs_per_batch:
description: 'Max number of times to run the test per batch. We always run 10 batches.'
@@ -100,7 +101,7 @@ jobs:
# mac and windows don't use make due to limitations with docker with those runners in GitHub
- name: "Set up postgres (macos)"
if: inputs.os == 'macos-14'
if: inputs.os == 'macos-12'
uses: ./.github/actions/setup-postgres-macos
- name: "Set up postgres (windows)"

6
.gitignore vendored
View File

@@ -57,9 +57,6 @@ test.env
makefile.test.env
*.pytest_cache/
# Unit test artifacts
index.html
# Translations
*.mo
@@ -108,6 +105,3 @@ venv/
# poetry
poetry.lock
# asdf
.tool-versions

View File

@@ -1,4 +1,4 @@
[settings]
profile=black
extend_skip_glob=.github/*,third-party-stubs/*,scripts/*
known_first_party=dbt,dbt_adapters,dbt_common,dbt_extractor,dbt_semantic_interfaces
known_first_party=dbt,dbt_adapters,dbt_common,dbt_extractor,dbt_semantic_interface

View File

@@ -3,7 +3,7 @@
exclude: ^(core/dbt/docs/build/|core/dbt/common/events/types_pb2.py|core/dbt/events/core_types_pb2.py|core/dbt/adapters/events/adapter_types_pb2.py)
# Force all unspecified python hooks to run python 3.9
# Force all unspecified python hooks to run python 3.8
default_language_version:
python: python3
@@ -15,19 +15,16 @@ repos:
args: [--unsafe]
- id: check-json
- id: end-of-file-fixer
exclude: schemas/dbt/manifest/
- id: trailing-whitespace
exclude_types:
- "markdown"
- id: check-case-conflict
- repo: https://github.com/pycqa/isort
# rev must match what's in dev-requirements.txt
rev: 5.13.2
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/psf/black
# rev must match what's in dev-requirements.txt
rev: 24.3.0
rev: 22.3.0
hooks:
- id: black
- id: black
@@ -37,7 +34,6 @@ repos:
- "--check"
- "--diff"
- repo: https://github.com/pycqa/flake8
# rev must match what's in dev-requirements.txt
rev: 4.0.1
hooks:
- id: flake8
@@ -45,7 +41,6 @@ repos:
alias: flake8-check
stages: [manual]
- repo: https://github.com/pre-commit/mirrors-mypy
# rev must match what's in dev-requirements.txt
rev: v1.4.1
hooks:
- id: mypy

View File

@@ -10,7 +10,6 @@
For information on prior major and minor releases, see their changelogs:
* [1.9](https://github.com/dbt-labs/dbt-core/blob/1.9.latest/CHANGELOG.md)
* [1.8](https://github.com/dbt-labs/dbt-core/blob/1.8.latest/CHANGELOG.md)
* [1.7](https://github.com/dbt-labs/dbt-core/blob/1.7.latest/CHANGELOG.md)
* [1.6](https://github.com/dbt-labs/dbt-core/blob/1.6.latest/CHANGELOG.md)

View File

@@ -170,9 +170,9 @@ Finally, you can also run a specific test or group of tests using [`pytest`](htt
```sh
# run all unit tests in a file
python3 -m pytest tests/unit/test_invocation_id.py
python3 -m pytest tests/unit/test_base_column.py
# run a specific unit test
python3 -m pytest tests/unit/test_invocation_id.py::TestInvocationId::test_invocation_id
python3 -m pytest tests/unit/test_base_column.py::TestNumericType::test__numeric_type
# run specific Postgres functional tests
python3 -m pytest tests/functional/sources
```

View File

@@ -33,6 +33,9 @@ RUN apt-get update \
python-is-python3 \
python-dev-is-python3 \
python3-pip \
python3.8 \
python3.8-dev \
python3.8-venv \
python3.9 \
python3.9-dev \
python3.9-venv \

View File

@@ -144,7 +144,3 @@ help: ## Show this help message.
@echo
@echo 'options:'
@echo 'use USE_DOCKER=true to run target in a docker container'
.PHONY: json_schema
json_schema: ## Update generated JSON schema using code changes.
scripts/collect-artifact-schema.py --path schemas

View File

@@ -1,7 +1,6 @@
ignore:
- ".github"
- ".changes"
coverage:
status:
project:
@@ -12,28 +11,3 @@ coverage:
default:
target: auto
threshold: 80%
comment:
layout: "header, diff, flags, components" # show component info in the PR comment
component_management:
default_rules: # default rules that will be inherited by all components
statuses:
- type: project # in this case every component that doens't have a status defined will have a project type one
target: auto
threshold: 0.1%
- type: patch
target: 80%
individual_components:
- component_id: unittests
name: "Unit Tests"
flag_regexes:
- "unit"
statuses:
- type: patch
target: 80%
threshold: 5%
- component_id: integrationtests
name: "Integration Tests"
flag_regexes:
- "integration"

View File

@@ -29,10 +29,6 @@ All existing resources are defined under `dbt/artifacts/resources/v1`.
## Making changes to dbt/artifacts
### All changes
All changes to any fields will require a manual update to [dbt-jsonschema](https://github.com/dbt-labs/dbt-jsonschema) to ensure live checking continues to work.
### Non-breaking changes
Freely make incremental, non-breaking changes in-place to the latest major version of any artifact (minor or patch bumps). The only changes that are fully forward and backward compatible are:
@@ -46,9 +42,9 @@ These types of minor, non-breaking changes are tested by [tests/unit/artifacts/t
#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
Note this must be done AFTER the core pull request is merged, otherwise we may end up with unresolvable conflicts and schemas that are invalid prior to base pull request merge. You may create the schemas.getdbt.com pull request prior to merging the base pull request, but do not merge until afterward.
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!
Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.

View File

@@ -38,7 +38,6 @@ from dbt.artifacts.resources.v1.macro import Macro, MacroArgument, MacroDependsO
from dbt.artifacts.resources.v1.metric import (
ConstantPropertyInput,
ConversionTypeParams,
CumulativeTypeParams,
Metric,
MetricConfig,
MetricInput,
@@ -46,7 +45,7 @@ from dbt.artifacts.resources.v1.metric import (
MetricTimeWindow,
MetricTypeParams,
)
from dbt.artifacts.resources.v1.model import Model, ModelConfig, TimeSpine
from dbt.artifacts.resources.v1.model import Model, ModelConfig
from dbt.artifacts.resources.v1.owner import Owner
from dbt.artifacts.resources.v1.saved_query import (
Export,

View File

@@ -68,10 +68,3 @@ class TimePeriod(StrEnum):
def plural(self) -> str:
return str(self) + "s"
class BatchSize(StrEnum):
hour = "hour"
day = "day"
month = "month"
year = "year"

View File

@@ -10,7 +10,6 @@ from dbt_common.contracts.config.properties import AdditionalPropertiesMixin
from dbt_common.contracts.constraints import ColumnLevelConstraint
from dbt_common.contracts.util import Mergeable
from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, dbtClassMixin
from dbt_semantic_interfaces.type_enums import TimeGranularity
NodeVersion = Union[str, float]
@@ -67,7 +66,6 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin):
quote: Optional[bool] = None
tags: List[str] = field(default_factory=list)
_extra: Dict[str, Any] = field(default_factory=dict)
granularity: Optional[TimeGranularity] = None
@dataclass
@@ -194,7 +192,6 @@ class ParsedResource(ParsedResourceMandatory):
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""
@@ -202,8 +199,6 @@ class ParsedResource(ParsedResourceMandatory):
dct = super().__post_serialize__(dct, context)
if context and context.get("artifact") and "config_call_dict" in dct:
del dct["config_call_dict"]
if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
del dct["unrendered_config_call_dict"]
return dct

View File

@@ -80,9 +80,6 @@ class NodeConfig(NodeAndTestConfig):
# 'mergebehavior' dictionary
materialized: str = "view"
incremental_strategy: Optional[str] = None
batch_size: Any = None
lookback: Any = 1
begin: Any = None
persist_docs: Dict[str, Any] = field(default_factory=dict)
post_hook: List[Hook] = field(
default_factory=list,
@@ -125,8 +122,6 @@ class NodeConfig(NodeAndTestConfig):
default_factory=ContractConfig,
metadata=MergeBehavior.Update.meta(),
)
event_time: Any = None
concurrent_batches: Any = None
def __post_init__(self):
# we validate that node_color has a suitable value to prevent dbt-docs from crashing

View File

@@ -2,6 +2,13 @@ import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Literal, Optional
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
MetricType,
TimeGranularity,
)
from dbt.artifacts.resources.base import GraphResource
from dbt.artifacts.resources.types import NodeType
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
@@ -11,13 +18,6 @@ from dbt.artifacts.resources.v1.semantic_layer_components import (
)
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
MetricType,
PeriodAggregation,
TimeGranularity,
)
"""
The following classes are dataclasses which are used to construct the Metric
@@ -80,13 +80,6 @@ class ConversionTypeParams(dbtClassMixin):
constant_properties: Optional[List[ConstantPropertyInput]] = None
@dataclass
class CumulativeTypeParams(dbtClassMixin):
window: Optional[MetricTimeWindow] = None
grain_to_date: Optional[TimeGranularity] = None
period_agg: PeriodAggregation = PeriodAggregation.FIRST
@dataclass
class MetricTypeParams(dbtClassMixin):
measure: Optional[MetricInputMeasure] = None
@@ -98,7 +91,6 @@ class MetricTypeParams(dbtClassMixin):
grain_to_date: Optional[TimeGranularity] = None
metrics: Optional[List[MetricInput]] = None
conversion_type_params: Optional[ConversionTypeParams] = None
cumulative_type_params: Optional[CumulativeTypeParams] = None
@dataclass
@@ -121,7 +113,6 @@ class Metric(GraphResource):
type_params: MetricTypeParams
filter: Optional[WhereFilterIntersection] = None
metadata: Optional[SourceFileMetadata] = None
time_granularity: Optional[TimeGranularity] = None
resource_type: Literal[NodeType.Metric]
meta: Dict[str, Any] = field(default_factory=dict, metadata=MergeBehavior.Update.meta())
tags: List[str] = field(default_factory=list)

View File

@@ -11,7 +11,6 @@ from dbt.artifacts.resources.v1.components import (
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt_common.contracts.config.base import MergeBehavior
from dbt_common.contracts.constraints import ModelLevelConstraint
from dbt_common.dataclass_schema import dbtClassMixin
@dataclass
@@ -22,18 +21,6 @@ class ModelConfig(NodeConfig):
)
@dataclass
class CustomGranularity(dbtClassMixin):
name: str
column_name: Optional[str] = None
@dataclass
class TimeSpine(dbtClassMixin):
standard_granularity_column: str
custom_granularities: List[CustomGranularity] = field(default_factory=list)
@dataclass
class Model(CompiledResource):
resource_type: Literal[NodeType.Model]
@@ -45,7 +32,6 @@ class Model(CompiledResource):
deprecation_date: Optional[datetime] = None
defer_relation: Optional[DeferRelation] = None
primary_key: List[str] = field(default_factory=list)
time_spine: Optional[TimeSpine] = None
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)

View File

@@ -4,6 +4,10 @@ import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Literal, Optional
from dbt_semantic_interfaces.type_enums.export_destination_type import (
ExportDestinationType,
)
from dbt.artifacts.resources.base import GraphResource
from dbt.artifacts.resources.types import NodeType
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
@@ -13,9 +17,6 @@ from dbt.artifacts.resources.v1.semantic_layer_components import (
)
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.type_enums.export_destination_type import (
ExportDestinationType,
)
@dataclass
@@ -34,7 +35,6 @@ class Export(dbtClassMixin):
name: str
config: ExportConfig
unrendered_config: Dict[str, str] = field(default_factory=dict)
@dataclass
@@ -44,8 +44,6 @@ class QueryParams(dbtClassMixin):
metrics: List[str]
group_by: List[str]
where: Optional[WhereFilterIntersection]
order_by: List[str] = field(default_factory=list)
limit: Optional[int] = None
@dataclass

View File

@@ -1,12 +1,13 @@
from dataclasses import dataclass
from typing import List, Sequence, Tuple
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
from dbt_semantic_interfaces.parsing.where_filter.where_filter_parser import (
WhereFilterParser,
)
from dbt_common.dataclass_schema import dbtClassMixin
@dataclass
class WhereFilter(dbtClassMixin):

View File

@@ -2,11 +2,6 @@ import time
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Sequence
from dbt.artifacts.resources import SourceFileMetadata
from dbt.artifacts.resources.base import GraphResource
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
@@ -22,6 +17,12 @@ from dbt_semantic_interfaces.type_enums import (
TimeGranularity,
)
from dbt.artifacts.resources import SourceFileMetadata
from dbt.artifacts.resources.base import GraphResource
from dbt.artifacts.resources.v1.components import DependsOn, RefArgs
from dbt_common.contracts.config.base import BaseConfig, CompareBehavior, MergeBehavior
from dbt_common.dataclass_schema import dbtClassMixin
"""
The classes in this file are dataclasses which are used to construct the Semantic
Model node in dbt-core. Additionally, these classes need to at a minimum support

View File

@@ -1,74 +1,56 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Dict, List, Literal, Optional, Union
from dbt.artifacts.resources.types import NodeType
from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt_common.dataclass_schema import ValidationError, dbtClassMixin
@dataclass
class SnapshotMetaColumnNames(dbtClassMixin):
dbt_valid_to: Optional[str] = None
dbt_valid_from: Optional[str] = None
dbt_scd_id: Optional[str] = None
dbt_updated_at: Optional[str] = None
dbt_is_deleted: Optional[str] = None
from dbt_common.dataclass_schema import ValidationError
@dataclass
class SnapshotConfig(NodeConfig):
materialized: str = "snapshot"
strategy: Optional[str] = None
unique_key: Optional[Union[str, List[str]]] = None
unique_key: Optional[str] = None
target_schema: Optional[str] = None
target_database: Optional[str] = None
updated_at: Optional[str] = None
# Not using Optional because of serialization issues with a Union of str and List[str]
check_cols: Union[str, List[str], None] = None
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
default_factory=SnapshotMetaColumnNames
)
dbt_valid_to_current: Optional[str] = None
@property
def snapshot_table_column_names(self):
return {
"dbt_valid_from": self.snapshot_meta_column_names.dbt_valid_from or "dbt_valid_from",
"dbt_valid_to": self.snapshot_meta_column_names.dbt_valid_to or "dbt_valid_to",
"dbt_scd_id": self.snapshot_meta_column_names.dbt_scd_id or "dbt_scd_id",
"dbt_updated_at": self.snapshot_meta_column_names.dbt_updated_at or "dbt_updated_at",
"dbt_is_deleted": self.snapshot_meta_column_names.dbt_is_deleted or "dbt_is_deleted",
}
def final_validate(self):
if not self.strategy or not self.unique_key:
@classmethod
def validate(cls, data):
super().validate(data)
# Note: currently you can't just set these keys in schema.yml because this validation
# will fail when parsing the snapshot node.
if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"):
raise ValidationError(
"Snapshots must be configured with a 'strategy' and 'unique_key'."
"Snapshots must be configured with a 'strategy', 'unique_key', "
"and 'target_schema'."
)
if self.strategy == "check":
if not self.check_cols:
if data.get("strategy") == "check":
if not data.get("check_cols"):
raise ValidationError(
"A snapshot configured with the check strategy must "
"specify a check_cols configuration."
)
if isinstance(self.check_cols, str) and self.check_cols != "all":
if isinstance(data["check_cols"], str) and data["check_cols"] != "all":
raise ValidationError(
f"Invalid value for 'check_cols': {self.check_cols}. "
f"Invalid value for 'check_cols': {data['check_cols']}. "
"Expected 'all' or a list of strings."
)
elif self.strategy == "timestamp":
if not self.updated_at:
elif data.get("strategy") == "timestamp":
if not data.get("updated_at"):
raise ValidationError(
"A snapshot configured with the timestamp strategy "
"must specify an updated_at configuration."
)
if self.check_cols:
if data.get("check_cols"):
raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'")
# If the strategy is not 'check' or 'timestamp' it's a custom strategy,
# formerly supported with GenericSnapshotConfig
if self.materialized and self.materialized != "snapshot":
if data.get("materialized") and data.get("materialized") != "snapshot":
raise ValidationError("A snapshot must have a materialized value of 'snapshot'")
# Called by "calculate_node_config_dict" in ContextConfigGenerator

View File

@@ -19,7 +19,6 @@ from dbt_common.exceptions import CompilationError
@dataclass
class SourceConfig(BaseConfig):
enabled: bool = True
event_time: Any = None
@dataclass
@@ -71,5 +70,3 @@ class SourceDefinition(ParsedSourceMandatory):
unrendered_config: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
created_at: float = field(default_factory=lambda: time.time())
unrendered_database: Optional[str] = None
unrendered_schema: Optional[str] = None

View File

@@ -20,7 +20,6 @@ class UnitTestConfig(BaseConfig):
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)
enabled: bool = True
class UnitTestFormat(StrEnum):

View File

@@ -77,11 +77,8 @@ class BaseArtifactMetadata(dbtClassMixin):
# remote-compile-result
# remote-execution-result
# remote-run-result
S = TypeVar("S", bound="VersionedSchema")
def schema_version(name: str, version: int):
def inner(cls: Type[S]):
def inner(cls: Type[VersionedSchema]):
cls.dbt_schema_version = SchemaVersion(
name=name,
version=version,

View File

@@ -1,24 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Tuple
from dbt_common.dataclass_schema import dbtClassMixin
BatchType = Tuple[datetime, datetime]
@dataclass
class BatchResults(dbtClassMixin):
successful: List[BatchType] = field(default_factory=list)
failed: List[BatchType] = field(default_factory=list)
def __add__(self, other: BatchResults) -> BatchResults:
return BatchResults(
successful=self.successful + other.successful,
failed=self.failed + other.failed,
)
def __len__(self):
return len(self.successful) + len(self.failed)

View File

@@ -1,11 +1,2 @@
# alias to latest
from dbt.artifacts.schemas.catalog.v1.catalog import * # noqa
from dbt_common.contracts.metadata import (
CatalogKey,
CatalogTable,
ColumnMap,
ColumnMetadata,
StatsDict,
StatsItem,
TableMetadata,
)

View File

@@ -1,18 +1,71 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, NamedTuple, Optional, Union
from dbt.artifacts.schemas.base import (
ArtifactMixin,
BaseArtifactMetadata,
schema_version,
)
from dbt_common.contracts.metadata import CatalogTable
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_common.utils.formatting import lowercase
Primitive = Union[bool, str, float, None]
PrimitiveDict = Dict[str, Primitive]
CatalogKey = NamedTuple(
"CatalogKey", [("database", Optional[str]), ("schema", str), ("name", str)]
)
@dataclass
class StatsItem(dbtClassMixin):
id: str
label: str
value: Primitive
include: bool
description: Optional[str] = None
StatsDict = Dict[str, StatsItem]
@dataclass
class ColumnMetadata(dbtClassMixin):
type: str
index: int
name: str
comment: Optional[str] = None
ColumnMap = Dict[str, ColumnMetadata]
@dataclass
class TableMetadata(dbtClassMixin):
type: str
schema: str
name: str
database: Optional[str] = None
comment: Optional[str] = None
owner: Optional[str] = None
@dataclass
class CatalogTable(dbtClassMixin):
metadata: TableMetadata
columns: ColumnMap
stats: StatsDict
# the same table with two unique IDs will just be listed two times
unique_id: Optional[str] = None
def key(self) -> CatalogKey:
return CatalogKey(
lowercase(self.metadata.database),
self.metadata.schema.lower(),
self.metadata.name.lower(),
)
@dataclass
class CatalogMetadata(BaseArtifactMetadata):

View File

@@ -10,12 +10,6 @@ from dbt_common.utils import cast_to_int, cast_to_str
@dataclass
class TimingInfo(dbtClassMixin):
"""
Represents a step in the execution of a node.
`name` should be one of: compile, execute, or other
Do not call directly, use `collect_timing_info` instead.
"""
name: str
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@@ -27,7 +21,7 @@ class TimingInfo(dbtClassMixin):
self.completed_at = datetime.utcnow()
def to_msg_dict(self):
msg_dict = {"name": str(self.name)}
msg_dict = {"name": self.name}
if self.started_at:
msg_dict["started_at"] = datetime_to_json_string(self.started_at)
if self.completed_at:
@@ -61,7 +55,6 @@ class NodeStatus(StrEnum):
Fail = "fail"
Warn = "warn"
Skipped = "skipped"
PartialSuccess = "partial success"
Pass = "pass"
RuntimeErr = "runtime error"
@@ -70,7 +63,6 @@ class RunStatus(StrEnum):
Success = NodeStatus.Success
Error = NodeStatus.Error
Skipped = NodeStatus.Skipped
PartialSuccess = NodeStatus.PartialSuccess
class TestStatus(StrEnum):

View File

@@ -1,5 +1,3 @@
from __future__ import annotations
import copy
import threading
from dataclasses import dataclass, field
@@ -19,7 +17,6 @@ from dbt.artifacts.schemas.base import (
get_artifact_schema_version,
schema_version,
)
from dbt.artifacts.schemas.batch_results import BatchResults
from dbt.artifacts.schemas.results import (
BaseResult,
ExecutionResult,
@@ -37,7 +34,6 @@ class RunResult(NodeResult):
agate_table: Optional["agate.Table"] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
batch_results: Optional[BatchResults] = None
@property
def skipped(self):
@@ -55,7 +51,6 @@ class RunResult(NodeResult):
node=node,
adapter_response={},
failures=None,
batch_results=None,
)
@@ -72,7 +67,6 @@ class RunResultOutput(BaseResult):
compiled: Optional[bool]
compiled_code: Optional[str]
relation_name: Optional[str]
batch_results: Optional[BatchResults] = None
def process_run_result(result: RunResult) -> RunResultOutput:
@@ -88,7 +82,6 @@ def process_run_result(result: RunResult) -> RunResultOutput:
message=result.message,
adapter_response=result.adapter_response,
failures=result.failures,
batch_results=result.batch_results,
compiled=result.node.compiled if compiled else None, # type:ignore
compiled_code=result.node.compiled_code if compiled else None, # type:ignore
relation_name=result.node.relation_name if compiled else None, # type:ignore
@@ -165,8 +158,7 @@ class RunResultsArtifact(ExecutionResult, ArtifactMixin):
@classmethod
def upgrade_schema_version(cls, data):
"""This overrides the "upgrade_schema_version" call in VersionedSchema (via
ArtifactMixin) to modify the dictionary passed in from earlier versions of the run_results.
"""
ArtifactMixin) to modify the dictionary passed in from earlier versions of the run_results."""
run_results_schema_version = get_artifact_schema_version(data)
# If less than the current version (v5), preprocess contents to match latest schema version
if run_results_schema_version <= 5:

View File

@@ -1,10 +1,7 @@
from typing import IO, List, Optional, Union
from typing import IO, Optional
from click.exceptions import ClickException
from dbt.artifacts.schemas.catalog import CatalogArtifact
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.results import RunExecutionResult
from dbt.utils import ExitCodes
@@ -26,7 +23,7 @@ class CliException(ClickException):
# the typing of _file is to satisfy the signature of ClickException.show
# overriding this method prevents click from printing any exceptions to stdout
def show(self, _file: Optional[IO] = None) -> None: # type: ignore[type-arg]
def show(self, _file: Optional[IO] = None) -> None:
pass
@@ -34,17 +31,7 @@ class ResultExit(CliException):
"""This class wraps any exception that contains results while invoking dbt, or the
results of an invocation that did not succeed but did not throw any exceptions."""
def __init__(
self,
result: Union[
bool, # debug
CatalogArtifact, # docs generate
List[str], # list/ls
Manifest, # parse
None, # clean, deps, init, source
RunExecutionResult, # build, compile, run, seed, snapshot, test, run-operation
] = None,
) -> None:
def __init__(self, result) -> None:
super().__init__(ExitCodes.ModelError)
self.result = result

View File

@@ -1,7 +1,6 @@
import os
import sys
from dataclasses import dataclass
from datetime import datetime
from importlib import import_module
from pathlib import Path
from pprint import pformat as pf
@@ -16,7 +15,7 @@ from dbt.cli.resolvers import default_log_path, default_project_dir
from dbt.cli.types import Command as CliCommand
from dbt.config.project import read_project_flags
from dbt.contracts.project import ProjectFlags
from dbt.deprecations import fire_buffered_deprecations, renamed_env_var
from dbt.deprecations import renamed_env_var
from dbt.events import ALL_EVENT_NAMES
from dbt_common import ui
from dbt_common.clients import jinja
@@ -38,7 +37,6 @@ FLAGS_DEFAULTS = {
"STRICT_MODE": False,
"STORE_FAILURES": False,
"INTROSPECT": True,
"STATE_MODIFIED_COMPARE_VARS": False,
}
DEPRECATED_PARAMS = {
@@ -59,7 +57,6 @@ def convert_config(config_name, config_value):
ret = WarnErrorOptions(
include=config_value.get("include", []),
exclude=config_value.get("exclude", []),
silence=config_value.get("silence", []),
valid_error_names=ALL_EVENT_NAMES,
)
return ret
@@ -94,8 +91,6 @@ class Flags:
# Set the default flags.
for key, value in FLAGS_DEFAULTS.items():
object.__setattr__(self, key, value)
# Use to handle duplicate params in _assign_params
flags_defaults_list = list(FLAGS_DEFAULTS.keys())
if ctx is None:
ctx = get_current_context()
@@ -177,29 +172,13 @@ class Flags:
old_name=dep_param.envvar,
new_name=new_param.envvar,
)
# end deprecated_params
# Set the flag value.
is_duplicate = (
hasattr(self, param_name.upper())
and param_name.upper() not in flags_defaults_list
)
# First time through, set as though FLAGS_DEFAULTS hasn't been set, so not a duplicate.
# Subsequent pass (to process "parent" params) should be treated as duplicates.
if param_name.upper() in flags_defaults_list:
flags_defaults_list.remove(param_name.upper())
# Note: the following determines whether parameter came from click default,
# not from FLAGS_DEFAULTS in __init__.
is_duplicate = hasattr(self, param_name.upper())
is_default = ctx.get_parameter_source(param_name) == ParameterSource.DEFAULT
is_envvar = ctx.get_parameter_source(param_name) == ParameterSource.ENVIRONMENT
flag_name = (new_name or param_name).upper()
# envvar flags are assigned in either parent or child context if there
# isn't an overriding cli command flag.
# If the flag has been encountered as a child cli flag, we don't
# want to overwrite with parent envvar, since the commandline flag takes precedence.
if (is_duplicate and not (is_default or is_envvar)) or not is_duplicate:
if (is_duplicate and not is_default) or not is_duplicate:
object.__setattr__(self, flag_name, param_value)
# Track default assigned params.
@@ -310,13 +289,6 @@ class Flags:
params_assigned_from_default, ["WARN_ERROR", "WARN_ERROR_OPTIONS"]
)
# Handle arguments mutually exclusive with INLINE
self._assert_mutually_exclusive(params_assigned_from_default, ["SELECT", "INLINE"])
self._assert_mutually_exclusive(params_assigned_from_default, ["SELECTOR", "INLINE"])
# Check event_time configs for validity
self._validate_event_time_configs()
# Support lower cased access for legacy code.
params = set(
x for x in dir(self) if not callable(getattr(self, x)) and not x.startswith("__")
@@ -343,9 +315,7 @@ class Flags:
"""
set_flag = None
for flag in group:
flag_set_by_user = (
hasattr(self, flag) and flag.lower() not in params_assigned_from_default
)
flag_set_by_user = flag.lower() not in params_assigned_from_default
if flag_set_by_user and set_flag:
raise DbtUsageException(
f"{flag.lower()}: not allowed with argument {set_flag.lower()}"
@@ -353,36 +323,6 @@ class Flags:
elif flag_set_by_user:
set_flag = flag
def _validate_event_time_configs(self) -> None:
event_time_start: datetime = (
getattr(self, "EVENT_TIME_START") if hasattr(self, "EVENT_TIME_START") else None
)
event_time_end: datetime = (
getattr(self, "EVENT_TIME_END") if hasattr(self, "EVENT_TIME_END") else None
)
# only do validations if at least one of `event_time_start` or `event_time_end` are specified
if event_time_start is not None or event_time_end is not None:
# These `ifs`, combined with the parent `if` make it so that `event_time_start` and
# `event_time_end` are mutually required
if event_time_start is None:
raise DbtUsageException(
"The flag `--event-time-end` was specified, but `--event-time-start` was not. "
"When specifying `--event-time-end`, `--event-time-start` must also be present."
)
if event_time_end is None:
raise DbtUsageException(
"The flag `--event-time-start` was specified, but `--event-time-end` was not. "
"When specifying `--event-time-start`, `--event-time-end` must also be present."
)
# This `if` just is a sanity check that `event_time_start` is before `event_time_end`
if event_time_start >= event_time_end:
raise DbtUsageException(
"Value for `--event-time-start` must be less than `--event-time-end`"
)
def fire_deprecations(self):
"""Fires events for deprecated env_var usage."""
[dep_fn() for dep_fn in self.deprecated_env_var_warnings]
@@ -390,8 +330,6 @@ class Flags:
# not get pickled when written to disk as json.
object.__delattr__(self, "deprecated_env_var_warnings")
fire_buffered_deprecations()
@classmethod
def from_dict(cls, command: CliCommand, args_dict: Dict[str, Any]) -> "Flags":
command_arg_list = command_params(command, args_dict)

View File

@@ -8,15 +8,12 @@ from click.exceptions import BadOptionUsage
from click.exceptions import Exit as ClickExit
from click.exceptions import NoSuchOption, UsageError
from dbt.adapters.factory import register_adapter
from dbt.artifacts.schemas.catalog import CatalogArtifact
from dbt.artifacts.schemas.run import RunExecutionResult
from dbt.cli import params as p
from dbt.cli import requires
from dbt.cli.exceptions import DbtInternalException, DbtUsageException
from dbt.cli.requires import setup_manifest
from dbt.contracts.graph.manifest import Manifest
from dbt.mp_context import get_mp_context
from dbt_common.events.base_types import EventMsg
@@ -168,8 +165,6 @@ def cli(ctx, **kwargs):
@click.pass_context
@global_flags
@p.empty
@p.event_time_start
@p.event_time_end
@p.exclude
@p.export_saved_queries
@p.full_refresh
@@ -223,7 +218,8 @@ def clean(ctx, **kwargs):
"""Delete all folders in the clean-targets list (usually the dbt_packages and target directories.)"""
from dbt.task.clean import CleanTask
with CleanTask(ctx.obj["flags"], ctx.obj["project"]) as task:
task = CleanTask(ctx.obj["flags"], ctx.obj["project"])
results = task.run()
success = task.interpret_results(results)
return results, success
@@ -278,7 +274,6 @@ def docs_generate(ctx, **kwargs):
@click.pass_context
@global_flags
@p.browser
@p.host
@p.port
@p.profiles_dir
@p.project_dir
@@ -357,7 +352,6 @@ def compile(ctx, **kwargs):
@p.select
@p.selector
@p.inline
@p.inline_direct
@p.target_path
@p.threads
@p.vars
@@ -366,21 +360,12 @@ def compile(ctx, **kwargs):
@requires.profile
@requires.project
@requires.runtime_config
@requires.manifest
def show(ctx, **kwargs):
"""Generates executable SQL for a named resource or inline query, runs that SQL, and returns a preview of the
results. Does not materialize anything to the warehouse."""
from dbt.task.show import ShowTask, ShowTaskDirect
from dbt.task.show import ShowTask
if ctx.obj["flags"].inline_direct:
# Issue the inline query directly, with no templating. Does not require
# loading the manifest.
register_adapter(ctx.obj["runtime_config"], get_mp_context())
task = ShowTaskDirect(
ctx.obj["flags"],
ctx.obj["runtime_config"],
)
else:
setup_manifest(ctx)
task = ShowTask(
ctx.obj["flags"],
ctx.obj["runtime_config"],
@@ -451,7 +436,7 @@ def deps(ctx, **kwargs):
message=f"Version is required in --add-package when a package when source is {flags.SOURCE}",
option_name="--add-package",
)
with DepsTask(flags, ctx.obj["project"]) as task:
task = DepsTask(flags, ctx.obj["project"])
results = task.run()
success = task.interpret_results(results)
return results, success
@@ -473,7 +458,8 @@ def init(ctx, **kwargs):
"""Initialize a new dbt project."""
from dbt.task.init import InitTask
with InitTask(ctx.obj["flags"]) as task:
task = InitTask(ctx.obj["flags"])
results = task.run()
success = task.interpret_results(results)
return results, success
@@ -552,8 +538,6 @@ def parse(ctx, **kwargs):
@p.profiles_dir
@p.project_dir
@p.empty
@p.event_time_start
@p.event_time_end
@p.select
@p.selector
@p.target_path
@@ -716,7 +700,6 @@ def seed(ctx, **kwargs):
@cli.command("snapshot")
@click.pass_context
@global_flags
@p.empty
@p.exclude
@p.profiles_dir
@p.project_dir
@@ -799,8 +782,6 @@ cli.commands["source"].add_command(snapshot_freshness, "snapshot-freshness") #
@click.pass_context
@global_flags
@p.exclude
@p.resource_type
@p.exclude_resource_type
@p.profiles_dir
@p.project_dir
@p.select

View File

@@ -1,6 +1,6 @@
from click import Choice, ParamType
from dbt.config.utils import normalize_warn_error_options, parse_cli_yaml_string
from dbt.config.utils import exclusive_primary_alt_value_setting, parse_cli_yaml_string
from dbt.events import ALL_EVENT_NAMES
from dbt.exceptions import OptionNotYamlDictError, ValidationError
from dbt_common.exceptions import DbtValidationError
@@ -51,7 +51,12 @@ class WarnErrorOptionsType(YAML):
def convert(self, value, param, ctx):
# this function is being used by param in click
include_exclude = super().convert(value, param, ctx)
normalize_warn_error_options(include_exclude)
exclusive_primary_alt_value_setting(
include_exclude, "include", "error", "warn_error_options"
)
exclusive_primary_alt_value_setting(
include_exclude, "exclude", "warn", "warn_error_options"
)
return WarnErrorOptions(
include=include_exclude.get("include", []),

View File

@@ -91,22 +91,6 @@ empty = click.option(
is_flag=True,
)
event_time_end = click.option(
"--event-time-end",
envvar="DBT_EVENT_TIME_END",
help="If specified, the end datetime dbt uses to filter microbatch model inputs (exclusive).",
type=click.DateTime(),
default=None,
)
event_time_start = click.option(
"--event-time-start",
envvar="DBT_EVENT_TIME_START",
help="If specified, the start datetime dbt uses to filter microbatch model inputs (inclusive).",
type=click.DateTime(),
default=None,
)
exclude = click.option(
"--exclude",
envvar=None,
@@ -151,14 +135,6 @@ full_refresh = click.option(
is_flag=True,
)
host = click.option(
"--host",
envvar="DBT_HOST",
help="host to serve dbt docs on",
type=click.STRING,
default="127.0.0.1",
)
indirect_selection = click.option(
"--indirect-selection",
envvar="DBT_INDIRECT_SELECTION",
@@ -487,13 +463,6 @@ inline = click.option(
help="Pass SQL inline to dbt compile and show",
)
inline_direct = click.option(
"--inline-direct",
envvar=None,
help="Internal flag to pass SQL inline to dbt show. Do not load the entire project or apply templating.",
hidden=True,
)
# `--select` and `--models` are analogous for most commands except `dbt list` for legacy reasons.
# Most CLI arguments should use the combined `select` option that aliases `--models` to `--select`.
# However, if you need to split out these separators (like `dbt ls`), use the `models` and `raw_select` options instead.

View File

@@ -41,13 +41,7 @@ from dbt_common.events.functions import LOG_VERSION, fire_event
from dbt_common.events.helpers import get_json_string_utcnow
from dbt_common.exceptions import DbtBaseException as DbtException
from dbt_common.invocation import reset_invocation_id
from dbt_common.record import (
Recorder,
RecorderMode,
get_record_mode_from_env,
get_record_types_from_dict,
get_record_types_from_env,
)
from dbt_common.record import Recorder, RecorderMode, get_record_mode_from_env
from dbt_common.utils import cast_dict_to_dict_of_strings
@@ -107,23 +101,13 @@ def preflight(func):
def setup_record_replay():
rec_mode = get_record_mode_from_env()
rec_types = get_record_types_from_env()
recorder: Optional[Recorder] = None
if rec_mode == RecorderMode.REPLAY:
previous_recording_path = os.environ.get("DBT_RECORDER_FILE_PATH")
recorder = Recorder(
RecorderMode.REPLAY, types=rec_types, previous_recording_path=previous_recording_path
)
elif rec_mode == RecorderMode.DIFF:
previous_recording_path = os.environ.get("DBT_RECORDER_FILE_PATH")
# ensure types match the previous recording
types = get_record_types_from_dict(previous_recording_path)
recorder = Recorder(
RecorderMode.DIFF, types=types, previous_recording_path=previous_recording_path
)
recording_path = os.environ["DBT_REPLAY"]
recorder = Recorder(RecorderMode.REPLAY, recording_path)
elif rec_mode == RecorderMode.RECORD:
recorder = Recorder(RecorderMode.RECORD, types=rec_types)
recorder = Recorder(RecorderMode.RECORD)
get_invocation_context().recorder = recorder
@@ -132,10 +116,7 @@ def tear_down_record_replay():
recorder = get_invocation_context().recorder
if recorder is not None:
if recorder.mode == RecorderMode.RECORD:
recorder.write()
if recorder.mode == RecorderMode.DIFF:
recorder.write()
recorder.write_diffs(diff_file_name="recording_diffs.json")
recorder.write("recording.json")
elif recorder.mode == RecorderMode.REPLAY:
recorder.write_diffs("replay_diffs.json")
@@ -179,11 +160,9 @@ def postflight(func):
process_in_blocks=rusage.ru_inblock,
process_out_blocks=rusage.ru_oublock,
),
(
EventLevel.INFO
if "flags" in ctx.obj and ctx.obj["flags"].SHOW_RESOURCE_REPORT
else None
),
else None,
)
fire_event(
@@ -324,20 +303,6 @@ def manifest(*args0, write=True, write_perf_info=False):
ctx = args[0]
assert isinstance(ctx, Context)
setup_manifest(ctx, write=write, write_perf_info=write_perf_info)
return func(*args, **kwargs)
return update_wrapper(wrapper, func)
# if there are no args, the decorator was used without params @decorator
# otherwise, the decorator was called with params @decorator(arg)
if len(args0) == 0:
return outer_wrapper
return outer_wrapper(args0[0])
def setup_manifest(ctx: Context, write: bool = True, write_perf_info: bool = False):
"""Load the manifest and add it to the context."""
req_strs = ["profile", "project", "runtime_config"]
reqs = [ctx.obj.get(dep) for dep in req_strs]
@@ -354,7 +319,18 @@ def setup_manifest(ctx: Context, write: bool = True, write_perf_info: bool = Fal
else:
register_adapter(runtime_config, get_mp_context())
adapter = get_adapter(runtime_config)
adapter.set_macro_context_generator(generate_runtime_macro_context) # type: ignore[arg-type]
adapter.set_macro_context_generator(generate_runtime_macro_context)
adapter.set_macro_resolver(ctx.obj["manifest"])
query_header_context = generate_query_header_context(adapter.config, ctx.obj["manifest"]) # type: ignore[attr-defined]
query_header_context = generate_query_header_context(
adapter.config, ctx.obj["manifest"]
)
adapter.connections.set_query_header(query_header_context)
return func(*args, **kwargs)
return update_wrapper(wrapper, func)
# if there are no args, the decorator was used without params @decorator
# otherwise, the decorator was called with params @decorator(arg)
if len(args0) == 0:
return outer_wrapper
return outer_wrapper(args0[0])

View File

@@ -1,50 +1,29 @@
import typing
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, Optional
import jinja2
from dbt.artifacts.resources import RefArgs
from dbt.exceptions import MacroNamespaceNotStringError, ParsingError
from dbt.exceptions import MacroNamespaceNotStringError
from dbt_common.clients.jinja import get_environment
from dbt_common.exceptions.macros import MacroNameNotStringError
from dbt_common.tests import test_caching_enabled
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
if typing.TYPE_CHECKING:
from dbt.context.providers import ParseDatabaseWrapper
_TESTING_MACRO_CACHE: Optional[Dict[str, Any]] = {}
_TESTING_MACRO_CACHE: Dict[str, Any] = {}
def statically_extract_has_name_this(source: str) -> bool:
"""Checks whether the raw jinja has any references to `this`"""
env = get_environment(None, capture_macros=True)
parsed = env.parse(source)
names = tuple(parsed.find_all(jinja2.nodes.Name))
for name in names:
if hasattr(name, "name") and name.name == "this":
return True
return False
def statically_extract_macro_calls(
source: str, ctx: Dict[str, Any], db_wrapper: Optional["ParseDatabaseWrapper"] = None
) -> List[str]:
def statically_extract_macro_calls(string, ctx, db_wrapper=None):
# set 'capture_macros' to capture undefined
env = get_environment(None, capture_macros=True)
global _TESTING_MACRO_CACHE
if test_caching_enabled() and source in _TESTING_MACRO_CACHE:
parsed = _TESTING_MACRO_CACHE.get(source, None)
if test_caching_enabled() and string in _TESTING_MACRO_CACHE:
parsed = _TESTING_MACRO_CACHE.get(string, None)
func_calls = getattr(parsed, "_dbt_cached_calls")
else:
parsed = env.parse(source)
parsed = env.parse(string)
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))
if test_caching_enabled():
_TESTING_MACRO_CACHE[source] = parsed
_TESTING_MACRO_CACHE[string] = parsed
setattr(parsed, "_dbt_cached_calls", func_calls)
standard_calls = ["source", "ref", "config"]
@@ -88,9 +67,30 @@ def statically_extract_macro_calls(
return possible_macro_calls
def statically_parse_adapter_dispatch(
func_call, ctx: Dict[str, Any], db_wrapper: Optional["ParseDatabaseWrapper"]
) -> List[str]:
# Call(
# node=Getattr(
# node=Name(
# name='adapter',
# ctx='load'
# ),
# attr='dispatch',
# ctx='load'
# ),
# args=[
# Const(value='test_pkg_and_dispatch')
# ],
# kwargs=[
# Keyword(
# key='packages',
# value=Call(node=Getattr(node=Name(name='local_utils', ctx='load'),
# attr='_get_utils_namespaces', ctx='load'), args=[], kwargs=[],
# dyn_args=None, dyn_kwargs=None)
# )
# ],
# dyn_args=None,
# dyn_kwargs=None
# )
def statically_parse_adapter_dispatch(func_call, ctx, db_wrapper):
possible_macro_calls = []
# This captures an adapter.dispatch('<macro_name>') call.
@@ -142,7 +142,7 @@ def statically_parse_adapter_dispatch(
if db_wrapper:
macro = db_wrapper.dispatch(func_name, macro_namespace=macro_namespace).macro
func_name = f"{macro.package_name}.{macro.name}" # type: ignore[attr-defined]
func_name = f"{macro.package_name}.{macro.name}"
possible_macro_calls.append(func_name)
else: # this is only for tests/unit/test_macro_calls.py
if macro_namespace:
@@ -153,93 +153,3 @@ def statically_parse_adapter_dispatch(
possible_macro_calls.append(f"{package_name}.{func_name}")
return possible_macro_calls
def statically_parse_ref_or_source(expression: str) -> Union[RefArgs, List[str]]:
"""
Returns a RefArgs or List[str] object, corresponding to ref or source respectively, given an input jinja expression.
input: str representing how input node is referenced in tested model sql
* examples:
- "ref('my_model_a')"
- "ref('my_model_a', version=3)"
- "ref('package', 'my_model_a', version=3)"
- "source('my_source_schema', 'my_source_name')"
If input is not a well-formed jinja ref or source expression, a ParsingError is raised.
"""
ref_or_source: Union[RefArgs, List[str]]
try:
statically_parsed = py_extract_from_source(f"{{{{ {expression} }}}}")
except ExtractionError:
raise ParsingError(f"Invalid jinja expression: {expression}")
if statically_parsed.get("refs"):
raw_ref = list(statically_parsed["refs"])[0]
ref_or_source = RefArgs(
package=raw_ref.get("package"),
name=raw_ref.get("name"),
version=raw_ref.get("version"),
)
elif statically_parsed.get("sources"):
source_name, source_table_name = list(statically_parsed["sources"])[0]
ref_or_source = [source_name, source_table_name]
else:
raise ParsingError(f"Invalid ref or source expression: {expression}")
return ref_or_source
def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]:
"""
Given a string with jinja, extract an unrendered config call.
If no config call is present, returns None.
For example, given:
"{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id"
returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"}
No config call:
"select 1 as id"
returns: None
"""
# Return early to avoid creating jinja environemt if no config call in input string
if "config(" not in string:
return None
# set 'capture_macros' to capture undefined
env = get_environment(None, capture_macros=True)
global _TESTING_MACRO_CACHE
if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE:
parsed = _TESTING_MACRO_CACHE.get(string, None)
func_calls = getattr(parsed, "_dbt_cached_calls")
else:
parsed = env.parse(string)
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))
config_func_calls = list(
filter(
lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config",
func_calls,
)
)
# There should only be one {{ config(...) }} call per input
config_func_call = config_func_calls[0] if config_func_calls else None
if not config_func_call:
return None
unrendered_config = {}
for kwarg in config_func_call.kwargs:
unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg)
return unrendered_config
def construct_static_kwarg_value(kwarg) -> str:
# Instead of trying to re-assemble complex kwarg value, simply stringify the value.
# This is still useful to be able to detect changes in unrendered configs, even if it is
# not an exact representation of the user input.
return str(kwarg)

View File

@@ -21,7 +21,6 @@ from dbt.contracts.graph.nodes import (
InjectedCTE,
ManifestNode,
ManifestSQLNode,
ModelNode,
SeedNode,
UnitTestDefinition,
UnitTestNode,
@@ -30,15 +29,12 @@ from dbt.events.types import FoundStats, WritingInjectedSQLForNode
from dbt.exceptions import (
DbtInternalError,
DbtRuntimeError,
ForeignKeyConstraintToSyntaxError,
GraphDependencyNotFoundError,
ParsingError,
)
from dbt.flags import get_flags
from dbt.graph import Graph
from dbt.node_types import ModelLanguage, NodeType
from dbt_common.clients.system import make_directory
from dbt_common.contracts.constraints import ConstraintType
from dbt_common.events.contextvars import get_node_info
from dbt_common.events.format import pluralize
from dbt_common.events.functions import fire_event
@@ -375,7 +371,7 @@ class Compiler:
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
new_cte_name = self.add_ephemeral_prefix(cte_model.identifier)
new_cte_name = self.add_ephemeral_prefix(cte_model.name)
rendered_sql = cte_model._pre_injected_sql or cte_model.compiled_code
sql = f" {new_cte_name} as (\n{rendered_sql}\n)"
@@ -441,31 +437,8 @@ class Compiler:
relation_name = str(relation_cls.create_from(self.config, node))
node.relation_name = relation_name
# Compile 'ref' and 'source' expressions in foreign key constraints
if isinstance(node, ModelNode):
for constraint in node.all_constraints:
if constraint.type == ConstraintType.foreign_key and constraint.to:
constraint.to = self._compile_relation_for_foreign_key_constraint_to(
manifest, node, constraint.to
)
return node
def _compile_relation_for_foreign_key_constraint_to(
self, manifest: Manifest, node: ManifestSQLNode, to_expression: str
) -> str:
try:
foreign_key_node = manifest.find_node_from_ref_or_source(to_expression)
except ParsingError:
raise ForeignKeyConstraintToSyntaxError(node, to_expression)
if not foreign_key_node:
raise GraphDependencyNotFoundError(node, to_expression)
adapter = get_adapter(self.config)
relation_name = str(adapter.Relation.create_from(self.config, foreign_key_node))
return relation_name
# This method doesn't actually "compile" any of the nodes. That is done by the
# "compile_node" method. This creates a Linker and builds the networkx graph,
# writes out the graph.gpickle file, and prints the stats, returning a Graph object.
@@ -521,9 +494,7 @@ class Compiler:
linker.write_graph(graph_path, manifest)
# writes the "compiled_code" into the target/compiled directory
def _write_node(
self, node: ManifestSQLNode, split_suffix: Optional[str] = None
) -> ManifestSQLNode:
def _write_node(self, node: ManifestSQLNode) -> ManifestSQLNode:
if not node.extra_ctes_injected or node.resource_type in (
NodeType.Snapshot,
NodeType.Seed,
@@ -532,9 +503,7 @@ class Compiler:
fire_event(WritingInjectedSQLForNode(node_info=get_node_info()))
if node.compiled_code:
node.compiled_path = node.get_target_write_path(
self.config.target_path, "compiled", split_suffix
)
node.compiled_path = node.get_target_write_path(self.config.target_path, "compiled")
node.write_node(self.config.project_root, node.compiled_path, node.compiled_code)
return node
@@ -544,7 +513,6 @@ class Compiler:
manifest: Manifest,
extra_context: Optional[Dict[str, Any]] = None,
write: bool = True,
split_suffix: Optional[str] = None,
) -> ManifestSQLNode:
"""This is the main entry point into this code. It's called by
CompileRunner.compile, GenericRPCRunner.compile, and
@@ -552,8 +520,6 @@ class Compiler:
the node's raw_code into compiled_code, and then calls the
recursive method to "prepend" the ctes.
"""
# REVIEW: UnitTestDefinition shouldn't be possible here because of the
# type of node, and it is likewise an invalid return type.
if isinstance(node, UnitTestDefinition):
return node
@@ -567,7 +533,7 @@ class Compiler:
node, _ = self._recursively_prepend_ctes(node, manifest, extra_context)
if write:
self._write_node(node, split_suffix=split_suffix)
self._write_node(node)
return node

View File

@@ -10,7 +10,7 @@ from dbt import deprecations
from dbt.adapters.contracts.connection import QueryComment
from dbt.clients.yaml_helper import load_yaml_text
from dbt.config.selectors import SelectorDict
from dbt.config.utils import normalize_warn_error_options
from dbt.config.utils import exclusive_primary_alt_value_setting
from dbt.constants import (
DBT_PROJECT_FILE_NAME,
DEPENDENCIES_FILE_NAME,
@@ -158,8 +158,14 @@ def _parse_versions(versions: Union[List[str], str]) -> List[VersionSpecifier]:
return [VersionSpecifier.from_version_string(v) for v in versions]
def _all_source_paths(*args: List[str]) -> List[str]:
paths = chain(*args)
def _all_source_paths(
model_paths: List[str],
seed_paths: List[str],
snapshot_paths: List[str],
analysis_paths: List[str],
macro_paths: List[str],
) -> List[str]:
paths = chain(model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths)
# Strip trailing slashes since the path is the same even though the name is not
stripped_paths = map(lambda s: s.rstrip("/"), paths)
return list(set(stripped_paths))
@@ -403,7 +409,7 @@ class PartialProject(RenderComponents):
snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ["snapshots"])
all_source_paths: List[str] = _all_source_paths(
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths, test_paths
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths
)
docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
@@ -474,7 +480,6 @@ class PartialProject(RenderComponents):
rendered.selectors_dict["selectors"]
)
dbt_cloud = cfg.dbt_cloud
flags: Dict[str, Any] = cfg.flags
project = Project(
project_name=name,
@@ -519,7 +524,6 @@ class PartialProject(RenderComponents):
project_env_vars=project_env_vars,
restrict_access=cfg.restrict_access,
dbt_cloud=dbt_cloud,
flags=flags,
)
# sanity check - this means an internal issue
project.validate()
@@ -564,6 +568,11 @@ class PartialProject(RenderComponents):
) = package_and_project_data_from_root(project_root)
selectors_dict = selector_data_from_root(project_root)
if "flags" in project_dict:
# We don't want to include "flags" in the Project,
# it goes in ProjectFlags
project_dict.pop("flags")
return cls.from_dicts(
project_root=project_root,
project_dict=project_dict,
@@ -636,7 +645,6 @@ class Project:
project_env_vars: Dict[str, Any]
restrict_access: bool
dbt_cloud: Dict[str, Any]
flags: Dict[str, Any]
@property
def all_source_paths(self) -> List[str]:
@@ -646,7 +654,6 @@ class Project:
self.snapshot_paths,
self.analysis_paths,
self.macro_paths,
self.test_paths,
)
@property
@@ -717,7 +724,6 @@ class Project:
"require-dbt-version": [v.to_version_string() for v in self.dbt_version],
"restrict-access": self.restrict_access,
"dbt-cloud": self.dbt_cloud,
"flags": self.flags,
}
)
if self.query_comment:
@@ -815,15 +821,20 @@ def read_project_flags(project_dir: str, profiles_dir: str) -> ProjectFlags:
if profile_project_flags:
# This can't use WARN_ERROR or WARN_ERROR_OPTIONS because they're in
# the config that we're loading. Uses special "buffer" method and fired after flags are initialized in preflight.
deprecations.buffer("project-flags-moved")
# the config that we're loading. Uses special "warn" method.
deprecations.warn("project-flags-moved")
project_flags = profile_project_flags
if project_flags is not None:
# handle collapsing `include` and `error` as well as collapsing `exclude` and `warn`
# for warn_error_options
warn_error_options = project_flags.get("warn_error_options", {})
normalize_warn_error_options(warn_error_options)
warn_error_options = project_flags.get("warn_error_options")
exclusive_primary_alt_value_setting(
warn_error_options, "include", "error", "warn_error_options"
)
exclusive_primary_alt_value_setting(
warn_error_options, "exclude", "warn", "warn_error_options"
)
ProjectFlags.validate(project_flags)
return ProjectFlags.from_dict(project_flags)

View File

@@ -1,8 +1,7 @@
import itertools
import os
from copy import deepcopy
from dataclasses import dataclass, field
from datetime import datetime
from dataclasses import dataclass
from pathlib import Path
from typing import (
Any,
@@ -16,8 +15,6 @@ from typing import (
Type,
)
import pytz
from dbt import tracking
from dbt.adapters.contracts.connection import (
AdapterRequiredConfig,
@@ -101,7 +98,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
profile_name: str
cli_vars: Dict[str, Any]
dependencies: Optional[Mapping[str, "RuntimeConfig"]] = None
invoked_at: datetime = field(default_factory=lambda: datetime.now(pytz.UTC))
def __post_init__(self):
self.validate()
@@ -197,7 +193,6 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
log_cache_events=log_cache_events,
dependencies=dependencies,
dbt_cloud=project.dbt_cloud,
flags=project.flags,
)
# Called by 'load_projects' in this class
@@ -295,9 +290,9 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
project_name=self.project_name,
project_id=self.hashed_name(),
user_id=tracking.active_user.id if tracking.active_user else None,
send_anonymous_usage_stats=(
get_flags().SEND_ANONYMOUS_USAGE_STATS if tracking.active_user else None
),
send_anonymous_usage_stats=get_flags().SEND_ANONYMOUS_USAGE_STATS
if tracking.active_user
else None,
adapter_type=self.credentials.type,
)

View File

@@ -49,18 +49,5 @@ def exclusive_primary_alt_value_setting(
f"Only `{alt}` or `{primary}` can be specified{where}, not both"
)
if alt in dictionary:
alt_value = dictionary.pop(alt)
dictionary[primary] = alt_value
def normalize_warn_error_options(warn_error_options: Dict[str, Any]) -> None:
exclusive_primary_alt_value_setting(
warn_error_options, "include", "error", "warn_error_options"
)
exclusive_primary_alt_value_setting(
warn_error_options, "exclude", "warn", "warn_error_options"
)
for key in ("include", "exclude", "silence"):
if key in warn_error_options and warn_error_options[key] is None:
warn_error_options[key] = []
if alt_options:
dictionary[primary] = alt_options

View File

@@ -1,5 +1,3 @@
from dbt_semantic_interfaces.type_enums import TimeGranularity
DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER"
SECRET_PLACEHOLDER = "$$$DBT_SECRET_START$$${}$$$DBT_SECRET_END$$$"
@@ -17,8 +15,5 @@ DEPENDENCIES_FILE_NAME = "dependencies.yml"
PACKAGE_LOCK_FILE_NAME = "package-lock.yml"
MANIFEST_FILE_NAME = "manifest.json"
SEMANTIC_MANIFEST_FILE_NAME = "semantic_manifest.json"
LEGACY_TIME_SPINE_MODEL_NAME = "metricflow_time_spine"
LEGACY_TIME_SPINE_GRANULARITY = TimeGranularity.DAY
MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY = TimeGranularity.DAY
PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack"
PACKAGE_LOCK_HASH_KEY = "sha1_hash"

View File

@@ -6,12 +6,9 @@ from typing import Any, Dict, Generic, Iterator, List, Optional, TypeVar
from dbt.adapters.factory import get_config_class_by_name
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.contracts.graph.model_config import get_config_for
from dbt.exceptions import SchemaConfigError
from dbt.flags import get_flags
from dbt.node_types import NodeType
from dbt.utils import fqn_search
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
from dbt_common.dataclass_schema import ValidationError
from dbt_common.contracts.config.base import BaseConfig, _listify
from dbt_common.exceptions import DbtInternalError
@@ -30,7 +27,8 @@ class ConfigSource:
def __init__(self, project):
self.project = project
def get_config_dict(self, resource_type: NodeType): ...
def get_config_dict(self, resource_type: NodeType):
...
class UnrenderedConfig(ConfigSource):
@@ -132,12 +130,12 @@ class BaseContextConfigGenerator(Generic[T]):
return self._project_configs(self._active_project, fqn, resource_type)
@abstractmethod
def _update_from_config(
self, result: T, partial: Dict[str, Any], validate: bool = False
) -> T: ...
def _update_from_config(self, result: T, partial: Dict[str, Any], validate: bool = False) -> T:
...
@abstractmethod
def initial_result(self, resource_type: NodeType, base: bool) -> T: ...
def initial_result(self, resource_type: NodeType, base: bool) -> T:
...
def calculate_node_config(
self,
@@ -183,7 +181,8 @@ class BaseContextConfigGenerator(Generic[T]):
project_name: str,
base: bool,
patch_config_dict: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]: ...
) -> Dict[str, Any]:
...
class ContextConfigGenerator(BaseContextConfigGenerator[C]):
@@ -239,12 +238,8 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]):
base=base,
patch_config_dict=patch_config_dict,
)
try:
finalized = config.finalize_and_validate()
return finalized.to_dict(omit_none=True)
except ValidationError as exc:
# we got a ValidationError - probably bad types in config()
raise SchemaConfigError(exc, node=config) from exc
class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]):
@@ -293,7 +288,6 @@ class ContextConfig:
project_name: str,
) -> None:
self._config_call_dict: Dict[str, Any] = {}
self._unrendered_config_call_dict: Dict[str, Any] = {}
self._active_project = active_project
self._fqn = fqn
self._resource_type = resource_type
@@ -301,11 +295,55 @@ class ContextConfig:
def add_config_call(self, opts: Dict[str, Any]) -> None:
dct = self._config_call_dict
merge_config_dicts(dct, opts)
self._add_config_call(dct, opts)
def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None:
# Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types.
self._unrendered_config_call_dict.update(opts)
@classmethod
def _add_config_call(cls, config_call_dict, opts: Dict[str, Any]) -> None:
# config_call_dict is already encountered configs, opts is new
# This mirrors code in _merge_field_value in model_config.py which is similar but
# operates on config objects.
for k, v in opts.items():
# MergeBehavior for post-hook and pre-hook is to collect all
# values, instead of overwriting
if k in BaseConfig.mergebehavior["append"]:
if not isinstance(v, list):
v = [v]
if k in config_call_dict: # should always be a list here
config_call_dict[k].extend(v)
else:
config_call_dict[k] = v
elif k in BaseConfig.mergebehavior["update"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in config_call_dict and isinstance(config_call_dict[k], dict):
config_call_dict[k].update(v)
else:
config_call_dict[k] = v
elif k in BaseConfig.mergebehavior["dict_key_append"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in config_call_dict: # should always be a dict
for key, value in v.items():
extend = False
# This might start with a +, to indicate we should extend the list
# instead of just clobbering it
if key.startswith("+"):
extend = True
if key in config_call_dict[k] and extend:
# extend the list
config_call_dict[k][key].extend(_listify(value))
else:
# clobber the list
config_call_dict[k][key] = _listify(value)
else:
# This is always a dictionary
config_call_dict[k] = v
# listify everything
for key, value in config_call_dict[k].items():
config_call_dict[k][key] = _listify(value)
else:
config_call_dict[k] = v
def build_config_dict(
self,
@@ -317,24 +355,12 @@ class ContextConfig:
if rendered:
# TODO CT-211
src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated]
config_call_dict = self._config_call_dict
else:
# TODO CT-211
src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment]
# preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict
if get_flags().state_modified_compare_more_unrendered_values is False:
config_call_dict = self._config_call_dict
else:
# Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not,
# as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc)
if self._config_call_dict and not self._unrendered_config_call_dict:
config_call_dict = self._config_call_dict
else:
config_call_dict = self._unrendered_config_call_dict
return src.calculate_node_config_dict(
config_call_dict=config_call_dict,
config_call_dict=self._config_call_dict,
fqn=self._fqn,
resource_type=self._resource_type,
project_name=self._project_name,

View File

@@ -8,7 +8,7 @@ from dbt.adapters.exceptions import (
RelationWrongTypeError,
)
from dbt.adapters.exceptions.cache import CacheInconsistencyError
from dbt.events.types import JinjaLogWarning, SnapshotTimestampWarning
from dbt.events.types import JinjaLogWarning
from dbt.exceptions import (
AmbiguousAliasError,
AmbiguousCatalogMatchError,
@@ -116,17 +116,6 @@ def raise_fail_fast_error(msg, node=None) -> NoReturn:
raise FailFastError(msg, node=node)
def warn_snapshot_timestamp_data_types(
snapshot_time_data_type: str, updated_at_data_type: str
) -> None:
warn_or_error(
SnapshotTimestampWarning(
snapshot_time_data_type=snapshot_time_data_type,
updated_at_data_type=updated_at_data_type,
)
)
# Update this when a new function should be added to the
# dbt context's `exceptions` key!
CONTEXT_EXPORTS = {
@@ -152,7 +141,6 @@ CONTEXT_EXPORTS = {
raise_contract_error,
column_type_missing,
raise_fail_fast_error,
warn_snapshot_timestamp_data_types,
]
}

View File

@@ -20,7 +20,6 @@ from typing_extensions import Protocol
from dbt import selected_resources
from dbt.adapters.base.column import Column
from dbt.adapters.base.relation import EventTimeFilter
from dbt.adapters.contracts.connection import AdapterResponse
from dbt.adapters.exceptions import MissingConfigError
from dbt.adapters.factory import (
@@ -28,14 +27,13 @@ from dbt.adapters.factory import (
get_adapter_package_names,
get_adapter_type_names,
)
from dbt.artifacts.resources import NodeConfig, NodeVersion, RefArgs, SourceConfig
from dbt.artifacts.resources import NodeVersion, RefArgs
from dbt.clients.jinja import (
MacroGenerator,
MacroStack,
UnitTestMacroGenerator,
get_rendered,
)
from dbt.clients.jinja_static import statically_parse_unrendered_config
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.constants import DEFAULT_ENV_PLACEHOLDER
from dbt.context.base import Var, contextmember, contextproperty
@@ -52,7 +50,6 @@ from dbt.contracts.graph.nodes import (
Exposure,
Macro,
ManifestNode,
ModelNode,
Resource,
SeedNode,
SemanticModel,
@@ -79,8 +76,6 @@ from dbt.exceptions import (
SecretEnvVarLocationError,
TargetNotFoundError,
)
from dbt.flags import get_flags
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
from dbt.node_types import ModelLanguage, NodeType
from dbt.utils import MultiDict, args_to_dict
from dbt_common.clients.jinja import MacroProtocol
@@ -235,29 +230,6 @@ class BaseResolver(metaclass=abc.ABCMeta):
def resolve_limit(self) -> Optional[int]:
return 0 if getattr(self.config.args, "EMPTY", False) else None
def resolve_event_time_filter(self, target: ManifestNode) -> Optional[EventTimeFilter]:
event_time_filter = None
if (
(isinstance(target.config, NodeConfig) or isinstance(target.config, SourceConfig))
and target.config.event_time
and isinstance(self.model, ModelNode)
and self.model.config.materialized == "incremental"
and self.model.config.incremental_strategy == "microbatch"
and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
and self.model.batch is not None
):
start = self.model.batch.event_time_start
end = self.model.batch.event_time_end
if start is not None or end is not None:
event_time_filter = EventTimeFilter(
field_name=target.config.event_time,
start=start,
end=end,
)
return event_time_filter
@abc.abstractmethod
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
pass
@@ -267,7 +239,8 @@ class BaseRefResolver(BaseResolver):
@abc.abstractmethod
def resolve(
self, name: str, package: Optional[str] = None, version: Optional[NodeVersion] = None
) -> RelationProxy: ...
) -> RelationProxy:
...
def _repack_args(
self, name: str, package: Optional[str], version: Optional[NodeVersion]
@@ -333,7 +306,8 @@ class BaseSourceResolver(BaseResolver):
class BaseMetricResolver(BaseResolver):
@abc.abstractmethod
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference: ...
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
...
def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
if package is None:
@@ -367,7 +341,8 @@ class BaseMetricResolver(BaseResolver):
class Config(Protocol):
def __init__(self, model, context_config: Optional[ContextConfig]): ...
def __init__(self, model, context_config: Optional[ContextConfig]):
...
# Implementation of "config(..)" calls in models
@@ -399,14 +374,6 @@ class ParseConfigObject(Config):
# not call it!
if self.context_config is None:
raise DbtRuntimeError("At parse time, did not receive a context config")
# Track unrendered opts to build parsed node unrendered_config later on
if get_flags().state_modified_compare_more_unrendered_values:
unrendered_config = statically_parse_unrendered_config(self.model.raw_code)
if unrendered_config:
self.context_config.add_unrendered_config_call(unrendered_config)
# Use rendered opts to populate context_config
self.context_config.add_config_call(opts)
return ""
@@ -581,11 +548,7 @@ class RuntimeRefResolver(BaseRefResolver):
def create_relation(self, target_model: ManifestNode) -> RelationProxy:
if target_model.is_ephemeral_model:
self.model.set_cte(target_model.unique_id, None)
return self.Relation.create_ephemeral_from(
target_model,
limit=self.resolve_limit,
event_time_filter=self.resolve_event_time_filter(target_model),
)
return self.Relation.create_ephemeral_from(target_model, limit=self.resolve_limit)
elif (
hasattr(target_model, "defer_relation")
and target_model.defer_relation
@@ -603,18 +566,10 @@ class RuntimeRefResolver(BaseRefResolver):
)
):
return self.Relation.create_from(
self.config,
target_model.defer_relation,
limit=self.resolve_limit,
event_time_filter=self.resolve_event_time_filter(target_model),
self.config, target_model.defer_relation, limit=self.resolve_limit
)
else:
return self.Relation.create_from(
self.config,
target_model,
limit=self.resolve_limit,
event_time_filter=self.resolve_event_time_filter(target_model),
)
return self.Relation.create_from(self.config, target_model, limit=self.resolve_limit)
def validate(
self,
@@ -648,11 +603,6 @@ class OperationRefResolver(RuntimeRefResolver):
class RuntimeUnitTestRefResolver(RuntimeRefResolver):
@property
def resolve_limit(self) -> Optional[int]:
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
return None
def resolve(
self,
target_name: str,
@@ -686,27 +636,10 @@ class RuntimeSourceResolver(BaseSourceResolver):
target_kind="source",
disabled=(isinstance(target_source, Disabled)),
)
# Source quoting does _not_ respect global configs in dbt_project.yml, as documented here:
# https://docs.getdbt.com/reference/project-configs/quoting
# Use an object with an empty quoting field to bypass any settings in self.
class SourceQuotingBaseConfig:
quoting: Dict[str, Any] = {}
return self.Relation.create_from(
SourceQuotingBaseConfig(),
target_source,
limit=self.resolve_limit,
event_time_filter=self.resolve_event_time_filter(target_source),
)
return self.Relation.create_from(self.config, target_source, limit=self.resolve_limit)
class RuntimeUnitTestSourceResolver(BaseSourceResolver):
@property
def resolve_limit(self) -> Optional[int]:
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
return None
def resolve(self, source_name: str, table_name: str):
target_source = self.manifest.resolve_source(
source_name,
@@ -1003,20 +936,7 @@ class ProviderContext(ManifestContext):
# macros/source defs aren't 'writeable'.
if isinstance(self.model, (Macro, SourceDefinition)):
raise MacrosSourcesUnWriteableError(node=self.model)
split_suffix = None
if (
isinstance(self.model, ModelNode)
and self.model.config.get("incremental_strategy") == "microbatch"
):
split_suffix = MicrobatchBuilder.format_batch_start(
self.model.config.get("__dbt_internal_microbatch_event_time_start"),
self.model.config.batch_size,
)
self.model.build_path = self.model.get_target_write_path(
self.config.target_path, "run", split_suffix=split_suffix
)
self.model.build_path = self.model.get_target_write_path(self.config.target_path, "run")
self.model.write_node(self.config.project_root, self.model.build_path, payload)
return ""
@@ -1057,8 +977,7 @@ class ProviderContext(ManifestContext):
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model)
# this is used by some adapters
table.original_abspath = os.path.abspath(path) # type: ignore
table.original_abspath = os.path.abspath(path)
return table
@contextproperty()
@@ -1680,7 +1599,7 @@ class UnitTestContext(ModelContext):
if self.model.this_input_node_unique_id:
this_node = self.manifest.expect(self.model.this_input_node_unique_id)
self.model.set_cte(this_node.unique_id, None) # type: ignore
return self.adapter.Relation.add_ephemeral_prefix(this_node.identifier) # type: ignore
return self.adapter.Relation.add_ephemeral_prefix(this_node.name)
return None

View File

@@ -4,7 +4,7 @@
## Artifacts
### Generating JSON schemas
A helper script, `scripts/collect-artifact-schema.py` is available to generate json schemas corresponding to versioned artifacts (`ArtifactMixin`s).
A helper script, `sripts/collect-artifact-schema.py` is available to generate json schemas corresponding to versioned artifacts (`ArtifactMixin`s).
This script is necessary to run when a new artifact schema version is created, or when changes are made to existing artifact versions, and writes json schema to `schema/dbt/<artifact>/v<version>.json`.

View File

@@ -192,14 +192,8 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
snapshots: List[str] = field(default_factory=list)
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.
# metrics generated from semantic_model measures
generated_metrics: List[str] = field(default_factory=list)
# metrics generated from semantic_model measures. The key is
# the name of the semantic_model, so that we can find it later.
metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
groups: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
@@ -213,9 +207,6 @@ class SchemaSourceFile(BaseSourceFile):
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
unrendered_configs: Dict[str, Any] = field(default_factory=dict)
unrendered_databases: Dict[str, Any] = field(default_factory=dict)
unrendered_schemas: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None
@@ -268,40 +259,6 @@ class SchemaSourceFile(BaseSourceFile):
return self.data_tests[yaml_key][name]
return []
def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
if self.generated_metrics:
# Probably not needed, but for safety sake, convert the
# old generated_metrics to metrics_from_measures.
self.fix_metrics_from_measures()
if semantic_model_name not in self.metrics_from_measures:
self.metrics_from_measures[semantic_model_name] = []
self.metrics_from_measures[semantic_model_name].append(metric_unique_id)
def fix_metrics_from_measures(self):
# Temporary method to fix up existing projects with a partial parse file.
# This should only be called if SchemaSourceFile in a msgpack
# pack manifest has an existing "generated_metrics" list, to turn it
# it into a "metrics_from_measures" dictionary, so that we can
# correctly partially parse.
# This code can be removed when "generated_metrics" is removed.
generated_metrics = self.generated_metrics
self.generated_metrics = [] # Should never be needed again
# For each metric_unique_id we loop through the semantic models
# looking for the name of the "measure" which generated the metric.
# When it's found, add it to "metrics_from_measures", with a key
# of the semantic_model name, and a list of metrics.
for metric_unique_id in generated_metrics:
parts = metric_unique_id.split(".")
# get the metric_name
metric_name = parts[-1]
if "semantic_models" in self.dict_from_yaml:
for sem_model in self.dict_from_yaml["semantic_models"]:
if "measures" in sem_model:
for measure in sem_model["measures"]:
if measure["name"] == metric_name:
self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
break
def get_key_and_name_for_test(self, test_unique_id):
yaml_key = None
block_name = None
@@ -321,41 +278,6 @@ class SchemaSourceFile(BaseSourceFile):
test_ids.extend(self.data_tests[key][name])
return test_ids
def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
versioned_name = f"{name}_v{version}" if version is not None else name
if yaml_key not in self.unrendered_configs:
self.unrendered_configs[yaml_key] = {}
if versioned_name not in self.unrendered_configs[yaml_key]:
self.unrendered_configs[yaml_key][versioned_name] = unrendered_config
def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
versioned_name = f"{name}_v{version}" if version is not None else name
if yaml_key not in self.unrendered_configs:
return None
if versioned_name not in self.unrendered_configs[yaml_key]:
return None
return self.unrendered_configs[yaml_key][versioned_name]
def delete_from_unrendered_configs(self, yaml_key, name):
# We delete all unrendered_configs for this yaml_key/name because the
# entry has been scheduled for reparsing.
if self.get_unrendered_config(yaml_key, name):
del self.unrendered_configs[yaml_key][name]
# Delete all versioned keys associated with name
version_names_to_delete = []
for potential_version_name in self.unrendered_configs[yaml_key]:
if potential_version_name.startswith(f"{name}_v"):
version_names_to_delete.append(potential_version_name)
for version_name in version_names_to_delete:
del self.unrendered_configs[yaml_key][version_name]
if not self.unrendered_configs[yaml_key]:
del self.unrendered_configs[yaml_key]
def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
@@ -372,30 +294,6 @@ class SchemaSourceFile(BaseSourceFile):
if not self.env_vars[yaml_key]:
del self.env_vars[yaml_key]
def add_unrendered_database(self, yaml_key: str, name: str, unrendered_database: str) -> None:
if yaml_key not in self.unrendered_databases:
self.unrendered_databases[yaml_key] = {}
self.unrendered_databases[yaml_key][name] = unrendered_database
def get_unrendered_database(self, yaml_key: str, name: str) -> Optional[str]:
if yaml_key not in self.unrendered_databases:
return None
return self.unrendered_databases[yaml_key].get(name)
def add_unrendered_schema(self, yaml_key: str, name: str, unrendered_schema: str) -> None:
if yaml_key not in self.unrendered_schemas:
self.unrendered_schemas[yaml_key] = {}
self.unrendered_schemas[yaml_key][name] = unrendered_schema
def get_unrendered_schema(self, yaml_key: str, name: str) -> Optional[str]:
if yaml_key not in self.unrendered_schemas:
return None
return self.unrendered_schemas[yaml_key].get(name)
@dataclass
class FixtureSourceFile(BaseSourceFile):

View File

@@ -29,13 +29,11 @@ from dbt.adapters.exceptions import (
DuplicateMacroInPackageError,
DuplicateMaterializationNameError,
)
from dbt.adapters.factory import get_adapter_package_names
# to preserve import paths
from dbt.artifacts.resources import BaseResource, DeferRelation, NodeVersion, RefArgs
from dbt.artifacts.resources import BaseResource, DeferRelation, NodeVersion
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt.artifacts.schemas.manifest import ManifestMetadata, UniqueID, WritableManifest
from dbt.clients.jinja_static import statically_parse_ref_or_source
from dbt.contracts.files import (
AnySourceFile,
FileHash,
@@ -55,11 +53,10 @@ from dbt.contracts.graph.nodes import (
ManifestNode,
Metric,
ModelNode,
ResultNode,
SavedQuery,
SeedNode,
SemanticModel,
SingularTestNode,
SnapshotNode,
SourceDefinition,
UnitTestDefinition,
UnitTestFileFixture,
@@ -67,7 +64,7 @@ from dbt.contracts.graph.nodes import (
)
from dbt.contracts.graph.unparsed import SourcePatch, UnparsedVersion
from dbt.contracts.util import SourceKey
from dbt.events.types import ArtifactWritten, UnpinnedRefNewVersionAvailable
from dbt.events.types import UnpinnedRefNewVersionAvailable
from dbt.exceptions import (
AmbiguousResourceNameRefError,
CompilationError,
@@ -91,7 +88,7 @@ DocName = str
RefName = str
def find_unique_id_for_package(storage, key, package: Optional[PackageName]) -> Optional[UniqueID]:
def find_unique_id_for_package(storage, key, package: Optional[PackageName]):
if key not in storage:
return None
@@ -415,11 +412,11 @@ class DisabledLookup(dbtClassMixin):
self.storage: Dict[str, Dict[PackageName, List[Any]]] = {}
self.populate(manifest)
def populate(self, manifest: "Manifest"):
def populate(self, manifest):
for node in list(chain.from_iterable(manifest.disabled.values())):
self.add_node(node)
def add_node(self, node: GraphMemberNode) -> None:
def add_node(self, node):
if node.search_name not in self.storage:
self.storage[node.search_name] = {}
if node.package_name not in self.storage[node.search_name]:
@@ -429,12 +426,8 @@ class DisabledLookup(dbtClassMixin):
# This should return a list of disabled nodes. It's different from
# the other Lookup functions in that it returns full nodes, not just unique_ids
def find(
self,
search_name,
package: Optional[PackageName],
version: Optional[NodeVersion] = None,
resource_types: Optional[List[NodeType]] = None,
) -> Optional[List[Any]]:
self, search_name, package: Optional[PackageName], version: Optional[NodeVersion] = None
):
if version:
search_name = f"{search_name}.v{version}"
@@ -443,72 +436,22 @@ class DisabledLookup(dbtClassMixin):
pkg_dct: Mapping[PackageName, List[Any]] = self.storage[search_name]
nodes = []
if package is None:
if not pkg_dct:
return None
else:
nodes = next(iter(pkg_dct.values()))
return next(iter(pkg_dct.values()))
elif package in pkg_dct:
nodes = pkg_dct[package]
return pkg_dct[package]
else:
return None
if resource_types is None:
return nodes
else:
new_nodes = []
for node in nodes:
if node.resource_type in resource_types:
new_nodes.append(node)
if not new_nodes:
return None
else:
return new_nodes
class AnalysisLookup(RefableLookup):
_lookup_types: ClassVar[set] = set([NodeType.Analysis])
_versioned_types: ClassVar[set] = set()
class SingularTestLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest") -> None:
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)
def get_unique_id(self, search_name, package: Optional[PackageName]) -> Optional[UniqueID]:
return find_unique_id_for_package(self.storage, search_name, package)
def find(
self, search_name, package: Optional[PackageName], manifest: "Manifest"
) -> Optional[SingularTestNode]:
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None
def add_singular_test(self, source: SingularTestNode) -> None:
if source.search_name not in self.storage:
self.storage[source.search_name] = {}
self.storage[source.search_name][source.package_name] = source.unique_id
def populate(self, manifest: "Manifest") -> None:
for node in manifest.nodes.values():
if isinstance(node, SingularTestNode):
self.add_singular_test(node)
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> SingularTestNode:
if unique_id not in manifest.nodes:
raise dbt_common.exceptions.DbtInternalError(
f"Singular test {unique_id} found in cache but not found in manifest"
)
node = manifest.nodes[unique_id]
assert isinstance(node, SingularTestNode)
return node
def _packages_to_search(
current_project: str,
node_package: str,
@@ -714,10 +657,10 @@ class MacroMethods:
self._macros_by_name = {}
self._macros_by_package = {}
def find_macro_candidate_by_name(
def find_macro_by_name(
self, name: str, root_project_name: str, package: Optional[str]
) -> Optional[MacroCandidate]:
"""Find a MacroCandidate in the graph by its name and package name, or None for
) -> Optional[Macro]:
"""Find a macro in the graph by its name and package name, or None for
any package. The root project name is used to determine priority:
- locally defined macros come first
- then imported macros
@@ -735,15 +678,7 @@ class MacroMethods:
filter=filter,
)
return candidates.last_candidate()
def find_macro_by_name(
self, name: str, root_project_name: str, package: Optional[str]
) -> Optional[Macro]:
macro_candidate = self.find_macro_candidate_by_name(
name=name, root_project_name=root_project_name, package=package
)
return macro_candidate.macro if macro_candidate else None
return candidates.last()
def find_generate_macro_by_name(
self, component: str, root_project_name: str, imported_package: Optional[str] = None
@@ -785,6 +720,9 @@ class MacroMethods:
filter: Optional[Callable[[MacroCandidate], bool]] = None,
) -> CandidateList:
"""Find macros by their name."""
# avoid an import cycle
from dbt.adapters.factory import get_adapter_package_names
candidates: CandidateList = CandidateList()
macros_by_name = self.get_macros_by_name()
@@ -916,9 +854,6 @@ class Manifest(MacroMethods, dbtClassMixin):
_analysis_lookup: Optional[AnalysisLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_singular_test_lookup: Optional[SingularTestLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_parsing_info: ParsingInfo = field(
default_factory=ParsingInfo,
metadata={"serialize": lambda x: None, "deserialize": lambda x: None},
@@ -1053,7 +988,6 @@ class Manifest(MacroMethods, dbtClassMixin):
self.metrics.values(),
self.semantic_models.values(),
self.saved_queries.values(),
self.unit_tests.values(),
)
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
@@ -1160,7 +1094,6 @@ class Manifest(MacroMethods, dbtClassMixin):
metrics=cls._map_resources_to_map_nodes(writable_manifest.metrics),
groups=cls._map_resources_to_map_nodes(writable_manifest.groups),
semantic_models=cls._map_resources_to_map_nodes(writable_manifest.semantic_models),
saved_queries=cls._map_resources_to_map_nodes(writable_manifest.saved_queries),
selectors={
selector_id: selector
for selector_id, selector in writable_manifest.selectors.items()
@@ -1227,9 +1160,7 @@ class Manifest(MacroMethods, dbtClassMixin):
)
def write(self, path):
writable = self.writable_manifest()
writable.write(path)
fire_event(ArtifactWritten(artifact_type=writable.__class__.__name__, artifact_path=path))
self.writable_manifest().write(path)
# Called in dbt.compilation.Linker.write_graph and
# dbt.graph.queue.get and ._include_in_cost
@@ -1316,16 +1247,29 @@ class Manifest(MacroMethods, dbtClassMixin):
self._analysis_lookup = AnalysisLookup(self)
return self._analysis_lookup
@property
def singular_test_lookup(self) -> SingularTestLookup:
if self._singular_test_lookup is None:
self._singular_test_lookup = SingularTestLookup(self)
return self._singular_test_lookup
@property
def external_node_unique_ids(self):
return [node.unique_id for node in self.nodes.values() if node.is_external_node]
def resolve_refs(
self,
source_node: ModelNode,
current_project: str, # TODO: ModelNode is overly restrictive typing
) -> List[MaybeNonSource]:
resolved_refs: List[MaybeNonSource] = []
for ref in source_node.refs:
resolved = self.resolve_ref(
source_node,
ref.name,
ref.package,
ref.version,
current_project,
source_node.package_name,
)
resolved_refs.append(resolved)
return resolved_refs
# Called by dbt.parser.manifest._process_refs & ManifestLoader.check_for_model_deprecations
def resolve_ref(
self,
@@ -1351,12 +1295,7 @@ class Manifest(MacroMethods, dbtClassMixin):
# it's possible that the node is disabled
if disabled is None:
disabled = self.disabled_lookup.find(
target_model_name,
pkg,
version=target_model_version,
resource_types=REFABLE_NODE_TYPES,
)
disabled = self.disabled_lookup.find(target_model_name, pkg, target_model_version)
if disabled:
return Disabled(disabled[0])
@@ -1505,10 +1444,8 @@ class Manifest(MacroMethods, dbtClassMixin):
return is_private_ref and (
not hasattr(node, "group")
or not node.group
# Invalid reference because group does not match
or node.group != target_model.group
# Or, invalid because these are different namespaces (project/package) and restrict-access is enforced
or (node.package_name != target_model.package_name and restrict_package_access)
or restrict_package_access
)
def is_invalid_protected_ref(
@@ -1613,14 +1550,12 @@ class Manifest(MacroMethods, dbtClassMixin):
if isinstance(node, GenericTestNode):
assert test_from
source_file.add_test(node.unique_id, test_from)
elif isinstance(node, Metric):
if isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
elif isinstance(node, Exposure):
if isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
elif isinstance(node, Group):
if isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(node, SnapshotNode):
source_file.snapshots.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass
else:
@@ -1631,15 +1566,13 @@ class Manifest(MacroMethods, dbtClassMixin):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)
def add_metric(
self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None
):
def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
if not generated_from:
if not generated:
source_file.metrics.append(metric.unique_id)
else:
source_file.add_metrics_from_measures(generated_from, metric.unique_id)
source_file.generated_metrics.append(metric.unique_id)
def add_group(self, source_file: SchemaSourceFile, group: Group):
_check_duplicates(group, self.groups)
@@ -1653,7 +1586,7 @@ class Manifest(MacroMethods, dbtClassMixin):
else:
self.disabled[node.unique_id] = [node]
def add_disabled(self, source_file: AnySourceFile, node: GraphMemberNode, test_from=None):
def add_disabled(self, source_file: AnySourceFile, node: ResultNode, test_from=None):
self.add_disabled_nofile(node)
if isinstance(source_file, SchemaSourceFile):
if isinstance(node, GenericTestNode):
@@ -1667,8 +1600,6 @@ class Manifest(MacroMethods, dbtClassMixin):
source_file.semantic_models.append(node.unique_id)
if isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
if isinstance(node, UnitTestDefinition):
source_file.unit_tests.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass
else:
@@ -1703,22 +1634,6 @@ class Manifest(MacroMethods, dbtClassMixin):
# end of methods formerly in ParseResult
def find_node_from_ref_or_source(
self, expression: str
) -> Optional[Union[ModelNode, SourceDefinition]]:
ref_or_source = statically_parse_ref_or_source(expression)
node = None
if isinstance(ref_or_source, RefArgs):
node = self.ref_lookup.find(
ref_or_source.name, ref_or_source.package, ref_or_source.version, self
)
else:
source_name, source_table_name = ref_or_source[0], ref_or_source[1]
node = self.source_lookup.find(f"{source_name}.{source_table_name}", None, self)
return node
# Provide support for copy.deepcopy() - we just need to avoid the lock!
# pickle and deepcopy use this. It returns a callable object used to
# create the initial version of the object and a tuple of arguments
@@ -1753,37 +1668,18 @@ class Manifest(MacroMethods, dbtClassMixin):
self._semantic_model_by_measure_lookup,
self._disabled_lookup,
self._analysis_lookup,
self._singular_test_lookup,
)
return self.__class__, args
def _microbatch_macro_is_core(self, project_name: str) -> bool:
microbatch_is_core = False
candidate = self.find_macro_candidate_by_name(
name="get_incremental_microbatch_sql", root_project_name=project_name, package=None
)
# We want to check for "Core", because "Core" basically means "builtin"
if candidate is not None and candidate.locality == Locality.Core:
microbatch_is_core = True
return microbatch_is_core
def use_microbatch_batches(self, project_name: str) -> bool:
return (
get_flags().require_batched_execution_for_custom_microbatch_strategy
or self._microbatch_macro_is_core(project_name=project_name)
)
class MacroManifest(MacroMethods):
def __init__(self, macros) -> None:
self.macros = macros
self.metadata = ManifestMetadata(
user_id=tracking.active_user.id if tracking.active_user else None,
send_anonymous_usage_stats=(
get_flags().SEND_ANONYMOUS_USAGE_STATS if tracking.active_user else None
),
send_anonymous_usage_stats=get_flags().SEND_ANONYMOUS_USAGE_STATS
if tracking.active_user
else None,
)
# This is returned by the 'graph' context property
# in the ProviderContext class.

View File

@@ -1,8 +1,9 @@
from typing import Any, Dict, Iterator, List
from dbt.contracts.graph.manifest import Manifest, Metric
from dbt_semantic_interfaces.type_enums import MetricType
from dbt.contracts.graph.manifest import Manifest, Metric
DERIVED_METRICS = [MetricType.DERIVED, MetricType.RATIO]
BASE_METRICS = [MetricType.SIMPLE, MetricType.CUMULATIVE, MetricType.CONVERSION]

View File

@@ -39,6 +39,12 @@ class UnitTestNodeConfig(NodeConfig):
expected_sql: Optional[str] = None
@dataclass
class EmptySnapshotConfig(NodeConfig):
materialized: str = "snapshot"
unique_key: Optional[str] = None # override NodeConfig unique_key definition
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
NodeType.Metric: MetricConfig,
NodeType.SemanticModel: SemanticModelConfig,
@@ -56,6 +62,7 @@ RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
# base resource types are like resource types, except nothing has mandatory
# configs.
BASE_RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = RESOURCE_TYPES.copy()
BASE_RESOURCE_TYPES.update({NodeType.Snapshot: EmptySnapshotConfig})
def get_config_for(resource_type: NodeType, base=False) -> Type[BaseConfig]:

View File

@@ -2,7 +2,6 @@ import hashlib
import os
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import (
Any,
Dict,
@@ -19,8 +18,7 @@ from typing import (
from mashumaro.types import SerializableType
from dbt.adapters.base import ConstraintSupport
from dbt.adapters.factory import get_adapter_constraint_support
from dbt import deprecations
from dbt.artifacts.resources import Analysis as AnalysisResource
from dbt.artifacts.resources import (
BaseResource,
@@ -58,11 +56,8 @@ from dbt.artifacts.resources import SingularTest as SingularTestResource
from dbt.artifacts.resources import Snapshot as SnapshotResource
from dbt.artifacts.resources import SourceDefinition as SourceDefinitionResource
from dbt.artifacts.resources import SqlOperation as SqlOperationResource
from dbt.artifacts.resources import TimeSpine
from dbt.artifacts.resources import UnitTestDefinition as UnitTestDefinitionResource
from dbt.artifacts.schemas.batch_results import BatchResults
from dbt.clients.jinja_static import statically_extract_has_name_this
from dbt.contracts.graph.model_config import UnitTestNodeConfig
from dbt.contracts.graph.model_config import EmptySnapshotConfig, UnitTestNodeConfig
from dbt.contracts.graph.node_args import ModelNodeArgs
from dbt.contracts.graph.unparsed import (
HasYamlMetadata,
@@ -88,12 +83,7 @@ from dbt.node_types import (
NodeType,
)
from dbt_common.clients.system import write_file
from dbt_common.contracts.constraints import (
ColumnLevelConstraint,
ConstraintType,
ModelLevelConstraint,
)
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_common.contracts.constraints import ConstraintType
from dbt_common.events.contextvars import set_log_contextvars
from dbt_common.events.functions import warn_or_error
@@ -247,9 +237,7 @@ class NodeInfoMixin:
@dataclass
class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType):
def get_target_write_path(
self, target_path: str, subdirectory: str, split_suffix: Optional[str] = None
):
def get_target_write_path(self, target_path: str, subdirectory: str):
# This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory
if os.path.basename(self.path) == os.path.basename(self.original_file_path):
# One-to-one relationship of nodes to files.
@@ -257,15 +245,6 @@ class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, Serializabl
else:
# Many-to-one relationship of nodes to files.
path = os.path.join(self.original_file_path, self.path)
if split_suffix:
pathlib_path = Path(path)
path = str(
pathlib_path.parent
/ pathlib_path.stem
/ (pathlib_path.stem + f"_{split_suffix}" + pathlib_path.suffix)
)
target_write_path = os.path.join(target_path, subdirectory, self.package_name, path)
return target_write_path
@@ -443,40 +422,8 @@ class HookNode(HookNodeResource, CompiledNode):
return HookNodeResource
@dataclass
class BatchContext(dbtClassMixin):
id: str
event_time_start: datetime
event_time_end: datetime
def __post_serialize__(self, data, context):
# This is insane, but necessary, I apologize. Mashumaro handles the
# dictification of this class via a compile time generated `to_dict`
# method based off of the _typing_ of th class. By default `datetime`
# types are converted to strings. We don't want that, we want them to
# stay datetimes.
# Note: This is safe because the `BatchContext` isn't part of the artifact
# and thus doesn't get written out.
new_data = super().__post_serialize__(data, context)
new_data["event_time_start"] = self.event_time_start
new_data["event_time_end"] = self.event_time_end
return new_data
@dataclass
class ModelNode(ModelResource, CompiledNode):
previous_batch_results: Optional[BatchResults] = None
batch: Optional[BatchContext] = None
_has_this: Optional[bool] = None
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)
if "_has_this" in dct:
del dct["_has_this"]
if "previous_batch_results" in dct:
del dct["previous_batch_results"]
return dct
@classmethod
def resource_class(cls) -> Type[ModelResource]:
return ModelResource
@@ -522,13 +469,6 @@ class ModelNode(ModelResource, CompiledNode):
def is_latest_version(self) -> bool:
return self.version is not None and self.version == self.latest_version
@property
def is_past_deprecation_date(self) -> bool:
return (
self.deprecation_date is not None
and self.deprecation_date < datetime.now().astimezone()
)
@property
def search_name(self):
if self.version is None:
@@ -540,24 +480,6 @@ class ModelNode(ModelResource, CompiledNode):
def materialization_enforces_constraints(self) -> bool:
return self.config.materialized in ["table", "incremental"]
@property
def all_constraints(self) -> List[Union[ModelLevelConstraint, ColumnLevelConstraint]]:
constraints: List[Union[ModelLevelConstraint, ColumnLevelConstraint]] = []
for model_level_constraint in self.constraints:
constraints.append(model_level_constraint)
for column in self.columns.values():
for column_level_constraint in column.constraints:
constraints.append(column_level_constraint)
return constraints
@property
def has_this(self) -> bool:
if self._has_this is None:
self._has_this = statically_extract_has_name_this(self.raw_code)
return self._has_this
def infer_primary_key(self, data_tests: List["GenericTestNode"]) -> List[str]:
"""
Infers the columns that can be used as primary key of a model in the following order:
@@ -579,20 +501,11 @@ class ModelNode(ModelResource, CompiledNode):
columns_with_disabled_unique_tests = set()
columns_with_not_null_tests = set()
for test in data_tests:
columns: List[str] = []
# extract columns from test kwargs, ensuring columns is a List[str] given tests can have custom (user or pacakge-defined) kwarg types
if "column_name" in test.test_metadata.kwargs and isinstance(
test.test_metadata.kwargs["column_name"], str
):
columns = []
if "column_name" in test.test_metadata.kwargs:
columns = [test.test_metadata.kwargs["column_name"]]
elif "combination_of_columns" in test.test_metadata.kwargs and isinstance(
test.test_metadata.kwargs["combination_of_columns"], list
):
columns = [
column
for column in test.test_metadata.kwargs["combination_of_columns"]
if isinstance(column, str)
]
elif "combination_of_columns" in test.test_metadata.kwargs:
columns = test.test_metadata.kwargs["combination_of_columns"]
for column in columns:
if test.test_metadata.name in ["unique", "unique_combination_of_columns"]:
@@ -657,42 +570,6 @@ class ModelNode(ModelResource, CompiledNode):
data = contract_state.encode("utf-8")
self.contract.checksum = hashlib.new("sha256", data).hexdigest()
def same_contract_removed(self) -> bool:
"""
self: the removed (deleted, renamed, or disabled) model node
"""
# If the contract wasn't previously enforced, no contract change has occurred
if self.contract.enforced is False:
return True
# Removed node is past its deprecation_date, so deletion does not constitute a contract change
if self.is_past_deprecation_date:
return True
# Disabled, deleted, or renamed node with previously enforced contract.
if not self.config.enabled:
breaking_change = f"Contracted model '{self.unique_id}' was disabled."
else:
breaking_change = f"Contracted model '{self.unique_id}' was deleted or renamed."
if self.version is None:
warn_or_error(
UnversionedBreakingChange(
breaking_changes=[breaking_change],
model_name=self.name,
model_file_path=self.original_file_path,
),
node=self,
)
return False
else:
raise (
ContractBreakingChangeError(
breaking_changes=[breaking_change],
node=self,
)
)
def same_contract(self, old, adapter_type=None) -> bool:
# If the contract wasn't previously enforced:
if old.contract.enforced is False and self.contract.enforced is False:
@@ -714,9 +591,9 @@ class ModelNode(ModelResource, CompiledNode):
contract_enforced_disabled: bool = False
columns_removed: List[str] = []
column_type_changes: List[Dict[str, str]] = []
enforced_column_constraint_removed: List[Dict[str, str]] = (
[]
) # column_name, constraint_type
enforced_column_constraint_removed: List[
Dict[str, str]
] = [] # column_name, constraint_type
enforced_model_constraint_removed: List[Dict[str, Any]] = [] # constraint_type, columns
materialization_changed: List[str] = []
@@ -724,6 +601,10 @@ class ModelNode(ModelResource, CompiledNode):
# Breaking change: the contract was previously enforced, and it no longer is
contract_enforced_disabled = True
# TODO: this avoid the circular imports but isn't ideal
from dbt.adapters.base import ConstraintSupport
from dbt.adapters.factory import get_adapter_constraint_support
constraint_support = get_adapter_constraint_support(adapter_type)
column_constraints_exist = False
@@ -1119,6 +1000,19 @@ class UnitTestFileFixture(BaseNode):
# ====================================
@dataclass
class IntermediateSnapshotNode(CompiledNode):
# at an intermediate stage in parsing, where we've built something better
# than an unparsed node for rendering in parse mode, it's pretty possible
# that we won't have critical snapshot-related information that is only
# defined in config blocks. To fix that, we have an intermediate type that
# uses a regular node config, which the snapshot parser will then convert
# into a full ParsedSnapshotNode after rendering. Note: it currently does
# not work to set snapshot config in schema files because of the validation.
resource_type: Literal[NodeType.Snapshot]
config: EmptySnapshotConfig = field(default_factory=EmptySnapshotConfig)
@dataclass
class SnapshotNode(SnapshotResource, CompiledNode):
@classmethod
@@ -1198,7 +1092,7 @@ class UnpatchedSourceDefinition(BaseNode):
def get_source_representation(self):
return f'source("{self.source.name}", "{self.table.name}")'
def validate_data_tests(self, is_root_project: bool):
def validate_data_tests(self):
"""
sources parse tests differently than models, so we need to do some validation
here where it's done in the PatchParser for other nodes
@@ -1209,6 +1103,11 @@ class UnpatchedSourceDefinition(BaseNode):
"Invalid test config: cannot have both 'tests' and 'data_tests' defined"
)
if self.tests:
deprecations.warn(
"project-test-config",
deprecated_path="tests",
exp_path="data_tests",
)
self.data_tests.extend(self.tests)
self.tests.clear()
@@ -1219,6 +1118,11 @@ class UnpatchedSourceDefinition(BaseNode):
"Invalid test config: cannot have both 'tests' and 'data_tests' defined"
)
if column.tests:
deprecations.warn(
"project-test-config",
deprecated_path="tests",
exp_path="data_tests",
)
column.data_tests.extend(column.tests)
column.tests.clear()
@@ -1236,6 +1140,7 @@ class UnpatchedSourceDefinition(BaseNode):
return [] if self.table.columns is None else self.table.columns
def get_tests(self) -> Iterator[Tuple[Dict[str, Any], Optional[UnparsedColumn]]]:
self.validate_data_tests()
for data_test in self.data_tests:
yield normalize_test(data_test), None
@@ -1272,16 +1177,12 @@ class SourceDefinition(
return SourceDefinitionResource
def same_database_representation(self, other: "SourceDefinition") -> bool:
# preserve legacy behaviour -- use potentially rendered database
if get_flags().state_modified_compare_more_unrendered_values is False:
same_database = self.database == other.database
same_schema = self.schema == other.schema
else:
same_database = self.unrendered_database == other.unrendered_database
same_schema = self.unrendered_schema == other.unrendered_schema
return same_database and same_schema and self.identifier == other.identifier and True
return (
self.database == other.database
and self.schema == other.schema
and self.identifier == other.identifier
and True
)
def same_quoting(self, other: "SourceDefinition") -> bool:
return self.quoting == other.quoting
@@ -1520,13 +1421,6 @@ class Group(GroupResource, BaseNode):
def resource_class(cls) -> Type[GroupResource]:
return GroupResource
def to_logging_dict(self) -> Dict[str, Union[str, Dict[str, str]]]:
return {
"name": self.name,
"package_name": self.package_name,
"owner": self.owner.to_dict(omit_none=True),
}
# ====================================
# SemanticModel node
@@ -1627,16 +1521,18 @@ class SavedQuery(NodeInfoMixin, GraphNode, SavedQueryResource):
return self.group == old.group
def same_exports(self, old: "SavedQuery") -> bool:
# TODO: This isn't currently used in `same_contents` (nor called anywhere else)
if len(self.exports) != len(old.exports):
return False
# exports should be in the same order, so we zip them for easy iteration
for old_export, new_export in zip(old.exports, self.exports):
if not (old_export.name == new_export.name):
return False
keys = ["export_as", "schema", "alias"]
for key in keys:
if old_export.unrendered_config.get(key) != new_export.unrendered_config.get(key):
for (old_export, new_export) in zip(old.exports, self.exports):
if not (
old_export.name == new_export.name
and old_export.config.export_as == new_export.config.export_as
and old_export.config.schema_name == new_export.config.schema_name
and old_export.config.alias == new_export.config.alias
):
return False
return True
@@ -1655,7 +1551,6 @@ class SavedQuery(NodeInfoMixin, GraphNode, SavedQueryResource):
and self.same_label(old)
and self.same_config(old)
and self.same_group(old)
and self.same_exports(old)
and True
)
@@ -1685,7 +1580,6 @@ class ParsedNodePatch(ParsedPatch):
latest_version: Optional[NodeVersion]
constraints: List[Dict[str, Any]]
deprecation_date: Optional[datetime]
time_spine: Optional[TimeSpine] = None
@dataclass
@@ -1693,11 +1587,6 @@ class ParsedMacroPatch(ParsedPatch):
arguments: List[MacroArgument] = field(default_factory=list)
@dataclass
class ParsedSingularTestPatch(ParsedPatch):
pass
# ====================================
# Node unions/categories
# ====================================
@@ -1725,7 +1614,6 @@ ManifestNode = Union[
ResultNode = Union[
ManifestNode,
SourceDefinition,
HookNode,
]
# All nodes that can be in the DAG
@@ -1748,7 +1636,6 @@ Resource = Union[
TestNode = Union[SingularTestNode, GenericTestNode]
SemanticManifestNode = Union[SavedQuery, SemanticModel, Metric]
RESOURCE_CLASS_TO_NODE_CLASS: Dict[Type[BaseResource], Type[BaseNode]] = {
node_class.resource_class(): node_class

View File

@@ -1,21 +1,4 @@
from typing import List, Optional, Set
from dbt import deprecations
from dbt.constants import (
LEGACY_TIME_SPINE_GRANULARITY,
LEGACY_TIME_SPINE_MODEL_NAME,
MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY,
)
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import ModelNode
from dbt.events.types import ArtifactWritten, SemanticValidationFailure
from dbt.exceptions import ParsingError
from dbt.flags import get_flags
from dbt_common.clients.system import write_file
from dbt_common.events.base_types import EventLevel
from dbt_common.events.functions import fire_event
from dbt_semantic_interfaces.implementations.metric import PydanticMetric
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
from dbt_semantic_interfaces.implementations.project_configuration import (
PydanticProjectConfiguration,
)
@@ -24,27 +7,23 @@ from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel
from dbt_semantic_interfaces.implementations.time_spine import (
PydanticTimeSpine,
PydanticTimeSpineCustomGranularityColumn,
PydanticTimeSpinePrimaryColumn,
)
from dbt_semantic_interfaces.implementations.time_spine_table_configuration import (
PydanticTimeSpineTableConfiguration as LegacyTimeSpine,
PydanticTimeSpineTableConfiguration,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
ValidationError,
ValidationIssueContext,
)
from dbt.events.types import SemanticValidationFailure
from dbt.exceptions import ParsingError
from dbt_common.clients.system import write_file
from dbt_common.events.base_types import EventLevel
from dbt_common.events.functions import fire_event
class SemanticManifest:
def __init__(self, manifest: Manifest) -> None:
def __init__(self, manifest) -> None:
self.manifest = manifest
def validate(self) -> bool:
@@ -65,116 +44,23 @@ class SemanticManifest:
semantic_manifest = self._get_pydantic_semantic_manifest()
validator = SemanticManifestValidator[PydanticSemanticManifest]()
validation_results = validator.validate_semantic_manifest(semantic_manifest)
validation_result_errors = list(validation_results.errors)
metrics_using_old_params: Set[str] = set()
for metric in semantic_manifest.metrics or []:
for field in ("window", "grain_to_date"):
type_params_field_value = getattr(metric.type_params, field)
# Warn that the old type_params structure has been deprecated.
if type_params_field_value:
metrics_using_old_params.add(metric.name)
if metrics_using_old_params:
if get_flags().require_nested_cumulative_type_params is False:
deprecations.warn(
"mf-cumulative-type-params-deprecation",
)
else:
names = ", ".join(metrics_using_old_params)
validation_result_errors.append(
ValidationError(
context=ValidationIssueContext(
# We don't have the file context at this point.
file_context=FileContext(),
object_name=names,
object_type="metric",
),
message=f"Cumulative fields `type_params.window` and `type_params.grain_to_date` should be nested under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. Invalid metrics: {names}. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes.",
)
)
time_spines = semantic_manifest.project_configuration.time_spines
legacy_time_spines = (
semantic_manifest.project_configuration.time_spine_table_configurations
)
# If the time spine contains a day grain then it is functionally equivalent to the legacy time spine.
time_spines_contain_day = any(
c for c in time_spines if c.primary_column.time_granularity == TimeGranularity.DAY
)
if (
get_flags().require_yaml_configuration_for_mf_time_spines is False
and legacy_time_spines
and not time_spines_contain_day
):
deprecations.warn(
"mf-timespine-without-yaml-configuration",
)
for warning in validation_results.warnings:
fire_event(SemanticValidationFailure(msg=warning.message))
for error in validation_result_errors:
for error in validation_results.errors:
fire_event(SemanticValidationFailure(msg=error.message), EventLevel.ERROR)
return not validation_result_errors
return not validation_results.errors
def write_json_to_file(self, file_path: str):
semantic_manifest = self._get_pydantic_semantic_manifest()
json = semantic_manifest.json()
write_file(file_path, json)
fire_event(ArtifactWritten(artifact_type=self.__class__.__name__, artifact_path=file_path))
def _get_pydantic_semantic_manifest(self) -> PydanticSemanticManifest:
pydantic_time_spines: List[PydanticTimeSpine] = []
minimum_time_spine_granularity: Optional[TimeGranularity] = None
for node in self.manifest.nodes.values():
if not (isinstance(node, ModelNode) and node.time_spine):
continue
time_spine = node.time_spine
standard_granularity_column = None
for column in node.columns.values():
if column.name == time_spine.standard_granularity_column:
standard_granularity_column = column
break
# Assertions needed for type checking
if not standard_granularity_column:
raise ParsingError(
"Expected to find time spine standard granularity column in model columns, but did not. "
"This should have been caught in YAML parsing."
)
if not standard_granularity_column.granularity:
raise ParsingError(
"Expected to find granularity set for time spine standard granularity column, but did not. "
"This should have been caught in YAML parsing."
)
pydantic_time_spine = PydanticTimeSpine(
node_relation=PydanticNodeRelation(
alias=node.alias,
schema_name=node.schema,
database=node.database,
relation_name=node.relation_name,
),
primary_column=PydanticTimeSpinePrimaryColumn(
name=time_spine.standard_granularity_column,
time_granularity=standard_granularity_column.granularity,
),
custom_granularities=[
PydanticTimeSpineCustomGranularityColumn(
name=custom_granularity.name, column_name=custom_granularity.column_name
)
for custom_granularity in time_spine.custom_granularities
],
)
pydantic_time_spines.append(pydantic_time_spine)
if (
not minimum_time_spine_granularity
or standard_granularity_column.granularity.to_int()
< minimum_time_spine_granularity.to_int()
):
minimum_time_spine_granularity = standard_granularity_column.granularity
project_config = PydanticProjectConfiguration(
time_spine_table_configurations=[], time_spines=pydantic_time_spines
time_spine_table_configurations=[],
)
pydantic_semantic_manifest = PydanticSemanticManifest(
metrics=[], semantic_models=[], project_configuration=project_config
@@ -193,39 +79,25 @@ class SemanticManifest:
PydanticSavedQuery.parse_obj(saved_query.to_dict())
)
# Look for time-spine table model and create time spine table configuration
if self.manifest.semantic_models:
legacy_time_spine_model = self.manifest.ref_lookup.find(
LEGACY_TIME_SPINE_MODEL_NAME, None, None, self.manifest
)
if legacy_time_spine_model:
if (
not minimum_time_spine_granularity
or LEGACY_TIME_SPINE_GRANULARITY.to_int()
< minimum_time_spine_granularity.to_int()
):
minimum_time_spine_granularity = LEGACY_TIME_SPINE_GRANULARITY
# If no time spines have been configured at DAY or smaller AND legacy time spine model does not exist, error.
if (
not minimum_time_spine_granularity
or minimum_time_spine_granularity.to_int()
> MINIMUM_REQUIRED_TIME_SPINE_GRANULARITY.to_int()
):
# Get model for time_spine_table
time_spine_model_name = "metricflow_time_spine"
model = self.manifest.ref_lookup.find(time_spine_model_name, None, None, self.manifest)
if not model:
raise ParsingError(
"The semantic layer requires a time spine model with granularity DAY or smaller in the project, "
"but none was found. Guidance on creating this model can be found on our docs site "
"(https://docs.getdbt.com/docs/build/metricflow-time-spine)."
"The semantic layer requires a 'metricflow_time_spine' model in the project, but none was found. "
"Guidance on creating this model can be found on our docs site ("
"https://docs.getdbt.com/docs/build/metricflow-time-spine) "
)
# For backward compatibility: if legacy time spine exists, include it in the manifest.
if legacy_time_spine_model:
legacy_time_spine = LegacyTimeSpine(
location=legacy_time_spine_model.relation_name,
# Create time_spine_table_config, set it in project_config, and add to semantic manifest
time_spine_table_config = PydanticTimeSpineTableConfiguration(
location=model.relation_name,
column_name="date_day",
grain=LEGACY_TIME_SPINE_GRANULARITY,
grain=TimeGranularity.DAY,
)
pydantic_semantic_manifest.project_configuration.time_spine_table_configurations = [
legacy_time_spine
time_spine_table_config
]
return pydantic_semantic_manifest

View File

@@ -4,6 +4,8 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Sequence, Union
from dbt_semantic_interfaces.type_enums import ConversionCalculationType
# trigger the PathEncoder
import dbt_common.helper_types # noqa:F401
from dbt import deprecations
@@ -21,7 +23,6 @@ from dbt.artifacts.resources import (
NodeVersion,
Owner,
Quoting,
TimeSpine,
UnitTestInputFixture,
UnitTestNodeVersions,
UnitTestOutputFixture,
@@ -38,10 +39,6 @@ from dbt_common.dataclass_schema import (
dbtClassMixin,
)
from dbt_common.exceptions import DbtInternalError
from dbt_semantic_interfaces.type_enums import (
ConversionCalculationType,
PeriodAggregation,
)
@dataclass
@@ -117,7 +114,6 @@ class HasColumnAndTestProps(HasColumnProps):
class UnparsedColumn(HasColumnAndTestProps):
quote: Optional[bool] = None
tags: List[str] = field(default_factory=list)
granularity: Optional[str] = None # str is really a TimeGranularity Enum
@dataclass
@@ -202,11 +198,6 @@ class UnparsedAnalysisUpdate(HasConfig, HasColumnDocs, HasColumnProps, HasYamlMe
access: Optional[str] = None
@dataclass
class UnparsedSingularTestUpdate(HasConfig, HasColumnProps, HasYamlMetadata):
pass
@dataclass
class UnparsedNodeUpdate(HasConfig, HasColumnTests, HasColumnAndTestProps, HasYamlMetadata):
quote_columns: Optional[bool] = None
@@ -220,7 +211,6 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
latest_version: Optional[NodeVersion] = None
versions: Sequence[UnparsedVersion] = field(default_factory=list)
deprecation_date: Optional[datetime.datetime] = None
time_spine: Optional[TimeSpine] = None
def __post_init__(self) -> None:
if self.latest_version:
@@ -242,41 +232,6 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
self.deprecation_date = normalize_date(self.deprecation_date)
if self.time_spine:
columns = (
self.get_columns_for_version(self.latest_version)
if self.latest_version
else self.columns
)
column_names_to_columns = {column.name: column for column in columns}
if self.time_spine.standard_granularity_column not in column_names_to_columns:
raise ParsingError(
f"Time spine standard granularity column must be defined on the model. Got invalid "
f"column name '{self.time_spine.standard_granularity_column}' for model '{self.name}'. Valid names"
f"{' for latest version' if self.latest_version else ''}: {list(column_names_to_columns.keys())}."
)
standard_column = column_names_to_columns[self.time_spine.standard_granularity_column]
if not standard_column.granularity:
raise ParsingError(
f"Time spine standard granularity column must have a granularity defined. Please add one for "
f"column '{self.time_spine.standard_granularity_column}' in model '{self.name}'."
)
custom_granularity_columns_not_found = []
for custom_granularity in self.time_spine.custom_granularities:
column_name = (
custom_granularity.column_name
if custom_granularity.column_name
else custom_granularity.name
)
if column_name not in column_names_to_columns:
custom_granularity_columns_not_found.append(column_name)
if custom_granularity_columns_not_found:
raise ParsingError(
"Time spine custom granularity columns do not exist in the model. "
f"Columns not found: {custom_granularity_columns_not_found}; "
f"Available columns: {list(column_names_to_columns.keys())}"
)
def get_columns_for_version(self, version: NodeVersion) -> List[UnparsedColumn]:
if version not in self._version_map:
raise DbtInternalError(
@@ -345,8 +300,6 @@ class UnparsedSourceDefinition(dbtClassMixin):
tables: List[UnparsedSourceTableDefinition] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
unrendered_database: Optional[str] = None
unrendered_schema: Optional[str] = None
@classmethod
def validate(cls, data):
@@ -579,13 +532,6 @@ class UnparsedConversionTypeParams(dbtClassMixin):
constant_properties: Optional[List[ConstantPropertyInput]] = None
@dataclass
class UnparsedCumulativeTypeParams(dbtClassMixin):
window: Optional[str] = None
grain_to_date: Optional[str] = None
period_agg: str = PeriodAggregation.FIRST.value
@dataclass
class UnparsedMetricTypeParams(dbtClassMixin):
measure: Optional[Union[UnparsedMetricInputMeasure, str]] = None
@@ -596,7 +542,6 @@ class UnparsedMetricTypeParams(dbtClassMixin):
grain_to_date: Optional[str] = None # str is really a TimeGranularity Enum
metrics: Optional[List[Union[UnparsedMetricInput, str]]] = None
conversion_type_params: Optional[UnparsedConversionTypeParams] = None
cumulative_type_params: Optional[UnparsedCumulativeTypeParams] = None
@dataclass
@@ -608,7 +553,6 @@ class UnparsedMetric(dbtClassMixin):
description: str = ""
# Note: `Union` must be the outermost part of the type annotation for serialization to work properly.
filter: Union[str, List[str], None] = None
time_granularity: Optional[str] = None
# metadata: Optional[Unparsedetadata] = None # TODO
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
@@ -720,8 +664,6 @@ class UnparsedQueryParams(dbtClassMixin):
group_by: List[str] = field(default_factory=list)
# Note: `Union` must be the outermost part of the type annotation for serialization to work properly.
where: Union[str, List[str], None] = None
order_by: List[str] = field(default_factory=list)
limit: Optional[int] = None
@dataclass

View File

@@ -5,6 +5,7 @@ from mashumaro.jsonschema.annotations import Pattern
from mashumaro.types import SerializableType
from typing_extensions import Annotated
from dbt import deprecations
from dbt.adapters.contracts.connection import QueryComment
from dbt.contracts.util import Identifier, list_str
from dbt_common.contracts.util import Mergeable
@@ -258,7 +259,6 @@ class Project(dbtClassMixin):
query_comment: Optional[Union[QueryComment, NoValue, str]] = field(default_factory=NoValue)
restrict_access: bool = False
dbt_cloud: Optional[Dict[str, Any]] = None
flags: Dict[str, Any] = field(default_factory=dict)
class Config(dbtMashConfig):
# These tell mashumaro to use aliases for jsonschema and for "from_dict"
@@ -312,6 +312,10 @@ class Project(dbtClassMixin):
raise ValidationError(
"Invalid project config: cannot have both 'tests' and 'data_tests' defined"
)
if "tests" in data:
deprecations.warn(
"project-test-config", deprecated_path="tests", exp_path="data_tests"
)
@dataclass
@@ -337,29 +341,17 @@ class ProjectFlags(ExtensibleDbtClassMixin):
warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None
write_json: Optional[bool] = None
# legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md
require_batched_execution_for_custom_microbatch_strategy: bool = False
# legacy behaviors
require_explicit_package_overrides_for_builtin_materializations: bool = True
require_resource_names_without_spaces: bool = False
source_freshness_run_project_hooks: bool = False
skip_nodes_if_on_run_start_fails: bool = False
state_modified_compare_more_unrendered_values: bool = False
state_modified_compare_vars: bool = False
require_yaml_configuration_for_mf_time_spines: bool = False
require_nested_cumulative_type_params: bool = False
@property
def project_only_flags(self) -> Dict[str, Any]:
return {
"require_batched_execution_for_custom_microbatch_strategy": self.require_batched_execution_for_custom_microbatch_strategy,
"require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations,
"require_resource_names_without_spaces": self.require_resource_names_without_spaces,
"source_freshness_run_project_hooks": self.source_freshness_run_project_hooks,
"skip_nodes_if_on_run_start_fails": self.skip_nodes_if_on_run_start_fails,
"state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values,
"state_modified_compare_vars": self.state_modified_compare_vars,
"require_yaml_configuration_for_mf_time_spines": self.require_yaml_configuration_for_mf_time_spines,
"require_nested_cumulative_type_params": self.require_nested_cumulative_type_params,
}

View File

@@ -7,9 +7,7 @@ from dbt.artifacts.schemas.base import VersionedSchema, schema_version
from dbt.artifacts.schemas.results import ExecutionResult, TimingInfo
from dbt.artifacts.schemas.run import RunExecutionResult, RunResult, RunResultsArtifact
from dbt.contracts.graph.nodes import ResultNode
from dbt.events.types import ArtifactWritten
from dbt_common.dataclass_schema import dbtClassMixin
from dbt_common.events.functions import fire_event
TaskTags = Optional[Dict[str, Any]]
TaskID = uuid.UUID
@@ -31,8 +29,7 @@ class RemoteCompileResult(RemoteCompileResultMixin):
generated_at: datetime = field(default_factory=datetime.utcnow)
@property
def error(self) -> None:
# TODO: Can we delete this? It's never set anywhere else and never accessed
def error(self):
return None
@@ -43,7 +40,7 @@ class RemoteExecutionResult(ExecutionResult):
args: Dict[str, Any] = field(default_factory=dict)
generated_at: datetime = field(default_factory=datetime.utcnow)
def write(self, path: str) -> None:
def write(self, path: str):
writable = RunResultsArtifact.from_execution_results(
generated_at=self.generated_at,
results=self.results,
@@ -51,7 +48,6 @@ class RemoteExecutionResult(ExecutionResult):
args=self.args,
)
writable.write(path)
fire_event(ArtifactWritten(artifact_type=writable.__class__.__name__, artifact_path=path))
@classmethod
def from_local_result(

View File

@@ -1,9 +1,9 @@
import abc
from typing import Callable, ClassVar, Dict, List, Optional, Set
from typing import ClassVar, Dict, List, Optional, Set
import dbt.tracking
from dbt.events import types as core_types
from dbt_common.events.functions import warn_or_error
from dbt_common.events.functions import fire_event, warn_or_error
class DBTDeprecation:
@@ -98,10 +98,24 @@ class CollectFreshnessReturnSignature(DBTDeprecation):
_event = "CollectFreshnessReturnSignature"
class TestsConfigDeprecation(DBTDeprecation):
_name = "project-test-config"
_event = "TestsConfigDeprecation"
class ProjectFlagsMovedDeprecation(DBTDeprecation):
_name = "project-flags-moved"
_event = "ProjectFlagsMovedDeprecation"
def show(self, *args, **kwargs) -> None:
if self.name not in active_deprecations:
event = self.event(**kwargs)
# We can't do warn_or_error because the ProjectFlags
# is where that is set up and we're just reading it.
fire_event(event)
self.track_deprecation_warn()
active_deprecations.add(self.name)
class PackageMaterializationOverrideDeprecation(DBTDeprecation):
_name = "package-materialization-override"
@@ -118,21 +132,6 @@ class SourceFreshnessProjectHooksNotRun(DBTDeprecation):
_event = "SourceFreshnessProjectHooksNotRun"
class MFTimespineWithoutYamlConfigurationDeprecation(DBTDeprecation):
_name = "mf-timespine-without-yaml-configuration"
_event = "MFTimespineWithoutYamlConfigurationDeprecation"
class MFCumulativeTypeParamsDeprecation(DBTDeprecation):
_name = "mf-cumulative-type-params-deprecation"
_event = "MFCumulativeTypeParamsDeprecation"
class MicrobatchMacroOutsideOfBatchesDeprecation(DBTDeprecation):
_name = "microbatch-macro-outside-of-batches-deprecation"
_event = "MicrobatchMacroOutsideOfBatchesDeprecation"
def renamed_env_var(old_name: str, new_name: str):
class EnvironmentVariableRenamed(DBTDeprecation):
_name = f"environment-variable-renamed:{old_name}"
@@ -148,7 +147,7 @@ def renamed_env_var(old_name: str, new_name: str):
return cb
def warn(name: str, *args, **kwargs) -> None:
def warn(name, *args, **kwargs):
if name not in deprecations:
# this should (hopefully) never happen
raise RuntimeError("Error showing deprecation warning: {}".format(name))
@@ -156,13 +155,6 @@ def warn(name: str, *args, **kwargs) -> None:
deprecations[name].show(*args, **kwargs)
def buffer(name: str, *args, **kwargs):
def show_callback():
deprecations[name].show(*args, **kwargs)
buffered_deprecations.append(show_callback)
# these are globally available
# since modules are only imported once, active_deprecations is a singleton
@@ -177,24 +169,15 @@ deprecations_list: List[DBTDeprecation] = [
ConfigLogPathDeprecation(),
ConfigTargetPathDeprecation(),
CollectFreshnessReturnSignature(),
TestsConfigDeprecation(),
ProjectFlagsMovedDeprecation(),
PackageMaterializationOverrideDeprecation(),
ResourceNamesWithSpacesDeprecation(),
SourceFreshnessProjectHooksNotRun(),
MFTimespineWithoutYamlConfigurationDeprecation(),
MFCumulativeTypeParamsDeprecation(),
MicrobatchMacroOutsideOfBatchesDeprecation(),
]
deprecations: Dict[str, DBTDeprecation] = {d.name: d for d in deprecations_list}
buffered_deprecations: List[Callable] = []
def reset_deprecations():
active_deprecations.clear()
def fire_buffered_deprecations():
[dep_fn() for dep_fn in buffered_deprecations]
buffered_deprecations.clear()

View File

@@ -1,5 +1,5 @@
# Events Module
The Events module is responsible for communicating internal dbt structures into a consumable interface. Because the "event" classes are based entirely on protobuf definitions, the interface is really clearly defined, whether or not protobufs are used to consume it. We use protoc for compiling the protobuf message definitions into Python classes.
The Events module is responsible for communicating internal dbt structures into a consumable interface. Because the "event" classes are based entirely on protobuf definitions, the interface is really clearly defined, whether or not protobufs are used to consume it. We use Betterproto for compiling the protobuf message definitions into Python classes.
# Using the Events Module
The event module provides types that represent what is happening in dbt in `events.types`. These types are intended to represent an exhaustive list of all things happening within dbt that will need to be logged, streamed, or printed. To fire an event, `common.events.functions::fire_event` is the entry point to the module from everywhere in dbt.
@@ -8,7 +8,7 @@ The event module provides types that represent what is happening in dbt in `even
When events are processed via `fire_event`, nearly everything is logged. Whether or not the user has enabled the debug flag, all debug messages are still logged to the file. However, some events are particularly time consuming to construct because they return a huge amount of data. Today, the only messages in this category are cache events and are only logged if the `--log-cache-events` flag is on. This is important because these messages should not be created unless they are going to be logged, because they cause a noticable performance degredation. These events use a "fire_event_if" functions.
# Adding a New Event
* Add a new message in `core_types.proto`, and a second message with the same name + "Msg". The "Msg" message should have two fields, an "info" field of EventInfo, and a "data" field referring to the message name without "Msg"
* Add a new message in types.proto, and a second message with the same name + "Msg". The "Msg" message should have two fields, an "info" field of EventInfo, and a "data" field referring to the message name without "Msg"
* run the protoc compiler to update core_types_pb2.py: make core_proto_types
* Add a wrapping class in core/dbt/event/core_types.py with a Level superclass plus code and message methods
* Add the class to tests/unit/test_events.py
@@ -37,6 +37,6 @@ class PartialParsingDeletedExposure(DebugLevel):
## Compiling core_types.proto
After adding a new message in `core_types.proto`, either:
After adding a new message in `types.proto`, either:
- In the repository root directory: `make core_proto_types`
- In the `core/dbt/events` directory: `protoc -I=. --python_out=. types.proto`

View File

@@ -445,30 +445,6 @@ message SourceFreshnessProjectHooksNotRunMsg {
SourceFreshnessProjectHooksNotRun data = 2;
}
// D018
message MFTimespineWithoutYamlConfigurationDeprecation {}
message MFTimespineWithoutYamlConfigurationDeprecationMsg {
CoreEventInfo info = 1;
MFTimespineWithoutYamlConfigurationDeprecation data = 2;
}
// D019
message MFCumulativeTypeParamsDeprecation {}
message MFCumulativeTypeParamsDeprecationMsg {
CoreEventInfo info = 1;
MFCumulativeTypeParamsDeprecation data = 2;
}
// D020
message MicrobatchMacroOutsideOfBatchesDeprecation {}
message MicrobatchMacroOutsideOfBatchesDeprecationMsg {
CoreEventInfo info = 1;
MicrobatchMacroOutsideOfBatchesDeprecation data = 2;
}
// I065
message DeprecatedModel {
string model_name = 1;
@@ -917,17 +893,6 @@ message FreshnessConfigProblemMsg {
}
// I074
message MicrobatchModelNoEventTimeInputs {
string model_name = 1;
}
message MicrobatchModelNoEventTimeInputsMsg {
CoreEventInfo info = 1;
MicrobatchModelNoEventTimeInputs data = 2;
}
// M - Deps generation
@@ -1252,19 +1217,6 @@ message DepsScrubbedPackageNameMsg{
DepsScrubbedPackageName data = 2;
}
// P - Artifacts
// P001
message ArtifactWritten {
string artifact_type = 1;
string artifact_path = 2;
}
message ArtifactWrittenMsg {
CoreEventInfo info = 1;
ArtifactWritten data = 2;
}
// Q - Node execution
// Q001
@@ -1319,12 +1271,6 @@ message SQLRunnerExceptionMsg {
SQLRunnerException data = 2;
}
message Group {
string name = 1;
string package_name = 3;
map<string, string> owner = 7;
}
// Q007
message LogTestResult {
NodeInfo node_info = 1;
@@ -1334,8 +1280,6 @@ message LogTestResult {
int32 num_models = 5;
float execution_time = 6;
int32 num_failures = 7;
Group group = 8;
string attached_node = 9;
}
message LogTestResultMsg {
@@ -1368,7 +1312,6 @@ message LogModelResult {
int32 index = 4;
int32 total = 5;
float execution_time = 6;
Group group = 7;
}
message LogModelResultMsg {
@@ -1430,7 +1373,7 @@ message LogFreshnessResultMsg {
LogFreshnessResult data = 2;
}
// Q019
// Q018
message LogNodeNoOpResult {
NodeInfo node_info = 1;
string description = 2;
@@ -1646,7 +1589,6 @@ message ShowNode {
bool is_inline = 3;
string output_format = 4;
string unique_id = 5;
bool quiet = 6;
}
message ShowNodeMsg {
@@ -1661,7 +1603,6 @@ message CompiledNode {
bool is_inline = 3;
string output_format = 4;
string unique_id = 5;
bool quiet = 6;
}
message CompiledNodeMsg {
@@ -1669,27 +1610,6 @@ message CompiledNodeMsg {
CompiledNode data = 2;
}
// Q043
message SnapshotTimestampWarning {
string snapshot_time_data_type = 1;
string updated_at_data_type = 2;
}
message SnapshotTimestampWarningMsg {
CoreEventInfo info = 1;
SnapshotTimestampWarning data = 2;
}
// Q044
message MicrobatchExecutionDebug {
string msg = 1;
}
message MicrobatchExecutionDebugMsg {
CoreEventInfo info = 1;
MicrobatchExecutionDebug data = 2;
}
// W - Node testing
// Skipped W001
@@ -1895,7 +1815,6 @@ message RunResultWarning {
string node_name = 2;
string path = 3;
NodeInfo node_info = 4;
Group group = 5;
}
message RunResultWarningMsg {
@@ -1909,7 +1828,6 @@ message RunResultFailure {
string node_name = 2;
string path = 3;
NodeInfo node_info = 4;
Group group = 5;
}
message RunResultFailureMsg {
@@ -1931,7 +1849,6 @@ message StatsLineMsg {
message RunResultError {
string msg = 1;
NodeInfo node_info = 2;
Group group = 3;
}
message RunResultErrorMsg {
@@ -1979,7 +1896,6 @@ message EndOfRunSummary {
int32 num_errors = 1;
int32 num_warnings = 2;
bool keyboard_interrupt = 3;
int32 num_partial_success = 4;
}
message EndOfRunSummaryMsg {
@@ -1987,19 +1903,7 @@ message EndOfRunSummaryMsg {
EndOfRunSummary data = 2;
}
// Skipped Z031, Z032
// Z033
message MarkSkippedChildren {
string unique_id = 1;
string status = 2;
RunResultMsg run_result = 3;
}
message MarkSkippedChildrenMsg {
CoreEventInfo info = 1;
MarkSkippedChildren data = 2;
}
// Skipped Z031, Z032, Z033
// Z034
message LogSkipBecauseError {
@@ -2007,7 +1911,6 @@ message LogSkipBecauseError {
string relation = 2;
int32 index = 3;
int32 total = 4;
string status = 5;
}
message LogSkipBecauseErrorMsg {

File diff suppressed because one or more lines are too long

View File

@@ -2,10 +2,8 @@ import os
from functools import partial
from typing import Callable, List
from dbt.tracking import track_behavior_change_warn
from dbt_common.events.base_types import EventLevel, EventMsg
from dbt_common.events.event_manager_client import (
add_callback_to_manager,
add_logger_to_manager,
cleanup_event_logger,
get_event_manager,
@@ -70,14 +68,15 @@ def setup_event_logger(flags, callbacks: List[Callable[[EventMsg], None]] = [])
make_log_dir_if_missing(flags.LOG_PATH)
event_manager = get_event_manager()
event_manager.callbacks = callbacks.copy()
add_callback_to_manager(track_behavior_change_warn)
if flags.LOG_LEVEL != "none":
line_format = _line_format_from_str(flags.LOG_FORMAT, LineFormat.PlainText)
log_level = (
EventLevel.ERROR
if flags.QUIET
else EventLevel.DEBUG if flags.DEBUG else EventLevel(flags.LOG_LEVEL)
else EventLevel.DEBUG
if flags.DEBUG
else EventLevel(flags.LOG_LEVEL)
)
console_config = get_stdout_config(
line_format,

View File

@@ -388,9 +388,6 @@ class ConfigTargetPathDeprecation(WarnLevel):
return line_wrap_message(warning_tag(f"Deprecated functionality\n\n{description}"))
# Note: this deprecation has been removed, but we are leaving
# the event class here, because users may have specified it in
# warn_error_options.
class TestsConfigDeprecation(WarnLevel):
def code(self) -> str:
return "D012"
@@ -466,36 +463,6 @@ class SourceFreshnessProjectHooksNotRun(WarnLevel):
return line_wrap_message(warning_tag(description))
class MFTimespineWithoutYamlConfigurationDeprecation(WarnLevel):
def code(self) -> str:
return "D018"
def message(self) -> str:
description = "Time spines without YAML configuration are in the process of deprecation. Please add YAML configuration for your 'metricflow_time_spine' model. See documentation on MetricFlow time spines: https://docs.getdbt.com/docs/build/metricflow-time-spine and behavior change documentation: https://docs.getdbt.com/reference/global-configs/behavior-changes."
return line_wrap_message(warning_tag(description))
class MFCumulativeTypeParamsDeprecation(WarnLevel):
def code(self) -> str:
return "D019"
def message(self) -> str:
description = "Cumulative fields `type_params.window` and `type_params.grain_to_date` have been moved and will soon be deprecated. Please nest those values under `type_params.cumulative_type_params.window` and `type_params.cumulative_type_params.grain_to_date`. See documentation on behavior changes: https://docs.getdbt.com/reference/global-configs/behavior-changes."
return line_wrap_message(warning_tag(description))
class MicrobatchMacroOutsideOfBatchesDeprecation(WarnLevel):
def code(self) -> str:
return "D020"
def message(self) -> str:
description = "The use of a custom microbatch macro outside of batched execution is deprecated. To use it with batched execution, set `flags.require_batched_execution_for_custom_microbatch_strategy` to `True` in `dbt_project.yml`. In the future this will be the default behavior."
return line_wrap_message(warning_tag(description))
# =======================================================
# I - Project parsing
# =======================================================
@@ -954,19 +921,6 @@ class FreshnessConfigProblem(WarnLevel):
return self.msg
class MicrobatchModelNoEventTimeInputs(WarnLevel):
def code(self) -> str:
return "I074"
def message(self) -> str:
msg = (
f"The microbatch model '{self.model_name}' has no 'ref' or 'source' input with an 'event_time' configuration. "
"\nThis means no filtering can be applied and can result in unexpected duplicate records in the resulting microbatch model."
)
return warning_tag(msg)
# =======================================================
# M - Deps generation
# =======================================================
@@ -1230,19 +1184,6 @@ class DepsScrubbedPackageName(WarnLevel):
return f"Detected secret env var in {self.package_name}. dbt will write a scrubbed representation to the lock file. This will cause issues with subsequent 'dbt deps' using the lock file, requiring 'dbt deps --upgrade'"
# =======================================================
# P - Artifacts
# =======================================================
class ArtifactWritten(DebugLevel):
def code(self):
return "P001"
def message(self) -> str:
return f"Wrote artifact {self.artifact_type} to {self.artifact_path}"
# =======================================================
# Q - Node execution
# =======================================================
@@ -1352,9 +1293,6 @@ class LogModelResult(DynamicLevel):
if self.status == "error":
info = "ERROR creating"
status = red(self.status.upper())
elif "PARTIAL SUCCESS" in self.status:
info = "PARTIALLY created"
status = yellow(self.status.upper())
else:
info = "OK created"
status = green(self.status)
@@ -1572,20 +1510,10 @@ class LogHookEndLine(InfoLevel):
return "Q033"
def message(self) -> str:
if self.status == "success":
info = "OK"
status = green(info)
elif self.status == "skipped":
info = "SKIP"
status = yellow(info)
else:
info = "ERROR"
status = red(info)
msg = f"{info} hook: {self.statement}"
msg = f"OK hook: {self.statement}"
return format_fancy_output_line(
msg=msg,
status=status,
status=green(self.status),
index=self.index,
total=self.total,
execution_time=self.execution_time,
@@ -1663,9 +1591,7 @@ class ShowNode(InfoLevel):
{"node": self.node_name, "show": json.loads(self.preview)}, indent=2
)
else:
if self.quiet:
return self.preview
elif self.is_inline:
if self.is_inline:
return f"Previewing inline node:\n{self.preview}"
else:
return f"Previewing node '{self.node_name}':\n{self.preview}"
@@ -1682,34 +1608,12 @@ class CompiledNode(InfoLevel):
else:
return json.dumps({"node": self.node_name, "compiled": self.compiled}, indent=2)
else:
if self.quiet:
return self.compiled
elif self.is_inline:
if self.is_inline:
return f"Compiled inline node is:\n{self.compiled}"
else:
return f"Compiled node '{self.node_name}' is:\n{self.compiled}"
class SnapshotTimestampWarning(WarnLevel):
def code(self) -> str:
return "Q043"
def message(self) -> str:
return (
f"Data type of snapshot table timestamp columns ({self.snapshot_time_data_type}) "
f"doesn't match derived column 'updated_at' ({self.updated_at_data_type}). "
"Please update snapshot config 'updated_at'."
)
class MicrobatchExecutionDebug(DebugLevel):
def code(self) -> str:
return "Q044"
def message(self) -> str:
return self.msg
# =======================================================
# W - Node testing
# =======================================================
@@ -1941,16 +1845,10 @@ class EndOfRunSummary(InfoLevel):
def message(self) -> str:
error_plural = pluralize(self.num_errors, "error")
warn_plural = pluralize(self.num_warnings, "warning")
partial_success_plural = f"""{self.num_partial_success} partial {"success" if self.num_partial_success == 1 else "successes"}"""
if self.keyboard_interrupt:
message = yellow("Exited because of keyboard interrupt")
elif self.num_errors > 0:
message = red(
f"Completed with {error_plural}, {partial_success_plural}, and {warn_plural}:"
)
elif self.num_partial_success > 0:
message = yellow(f"Completed with {partial_success_plural} and {warn_plural}")
message = red(f"Completed with {error_plural} and {warn_plural}:")
elif self.num_warnings > 0:
message = yellow(f"Completed with {warn_plural}:")
else:
@@ -1958,21 +1856,7 @@ class EndOfRunSummary(InfoLevel):
return message
# Skipped Z031, Z032
class MarkSkippedChildren(DebugLevel):
def code(self) -> str:
return "Z033"
def message(self) -> str:
msg = (
f"Marking all children of '{self.unique_id}' to be skipped "
f"because of status '{self.status}'. "
)
if self.run_result.message:
msg = msg + f" Reason: {self.run_result.message}."
return msg
# Skipped Z031, Z032, Z033
class LogSkipBecauseError(ErrorLevel):
@@ -1980,7 +1864,7 @@ class LogSkipBecauseError(ErrorLevel):
return "Z034"
def message(self) -> str:
msg = f"SKIP relation {self.schema}.{self.relation} due to ephemeral model status '{self.status}'"
msg = f"SKIP relation {self.schema}.{self.relation} due to ephemeral model error"
return format_fancy_output_line(
msg=msg, status=red("ERROR SKIP"), index=self.index, total=self.total
)

View File

@@ -136,18 +136,6 @@ class GraphDependencyNotFoundError(CompilationError):
return msg
class ForeignKeyConstraintToSyntaxError(CompilationError):
def __init__(self, node, expression: str) -> None:
self.expression = expression
self.node = node
super().__init__(msg=self.get_message())
def get_message(self) -> str:
msg = f"'{self.node.unique_id}' defines a foreign key constraint 'to' expression which is not valid 'ref' or 'source' syntax: {self.expression}."
return msg
# client level exceptions

View File

@@ -68,7 +68,6 @@ def get_flag_dict():
"target_path",
"log_path",
"invocation_command",
"empty",
}
return {key: getattr(GLOBAL_FLAGS, key.upper(), None) for key in flag_attr}

View File

@@ -59,40 +59,18 @@ class Graph:
def select_children(
self, selected: Set[UniqueId], max_depth: Optional[int] = None
) -> Set[UniqueId]:
"""Returns all nodes which are descendants of the 'selected' set.
Nodes in the 'selected' set are counted as children only if
they are descendants of other nodes in the 'selected' set."""
children: Set[UniqueId] = set()
i = 0
while len(selected) > 0 and (max_depth is None or i < max_depth):
next_layer: Set[UniqueId] = set()
descendants: Set[UniqueId] = set()
for node in selected:
next_layer.update(self.descendants(node, 1))
next_layer = next_layer - children # Avoid re-searching
children.update(next_layer)
selected = next_layer
i += 1
return children
descendants.update(self.descendants(node, max_depth))
return descendants
def select_parents(
self, selected: Set[UniqueId], max_depth: Optional[int] = None
) -> Set[UniqueId]:
"""Returns all nodes which are ancestors of the 'selected' set.
Nodes in the 'selected' set are counted as parents only if
they are ancestors of other nodes in the 'selected' set."""
parents: Set[UniqueId] = set()
i = 0
while len(selected) > 0 and (max_depth is None or i < max_depth):
next_layer: Set[UniqueId] = set()
ancestors: Set[UniqueId] = set()
for node in selected:
next_layer.update(self.ancestors(node, 1))
next_layer = next_layer - parents # Avoid re-searching
parents.update(next_layer)
selected = next_layer
i += 1
return parents
ancestors.update(self.ancestors(node, max_depth))
return ancestors
def select_successors(self, selected: Set[UniqueId]) -> Set[UniqueId]:
successors: Set[UniqueId] = set()

View File

@@ -25,15 +25,8 @@ class GraphQueue:
the same time, as there is an unlocked race!
"""
def __init__(
self,
graph: nx.DiGraph,
manifest: Manifest,
selected: Set[UniqueId],
preserve_edges: bool = True,
) -> None:
# 'create_empty_copy' returns a copy of the graph G with all of the edges removed, and leaves nodes intact.
self.graph = graph if preserve_edges else nx.classes.function.create_empty_copy(graph)
def __init__(self, graph: nx.DiGraph, manifest: Manifest, selected: Set[UniqueId]) -> None:
self.graph = graph
self.manifest = manifest
self._selected = selected
# store the queue as a priority queue.

View File

@@ -87,15 +87,12 @@ class NodeSelector(MethodManager):
)
return set(), set()
neighbors = self.collect_specified_neighbors(spec, collected)
selected = collected | neighbors
# if --indirect-selection EMPTY, do not expand to adjacent tests
if spec.indirect_selection == IndirectSelection.Empty:
return selected, set()
return collected, set()
else:
neighbors = self.collect_specified_neighbors(spec, collected)
direct_nodes, indirect_nodes = self.expand_selection(
selected=selected, indirect_selection=spec.indirect_selection
selected=(collected | neighbors), indirect_selection=spec.indirect_selection
)
return direct_nodes, indirect_nodes
@@ -173,22 +170,17 @@ class NodeSelector(MethodManager):
semantic_model = self.manifest.semantic_models[unique_id]
return semantic_model.config.enabled
elif unique_id in self.manifest.unit_tests:
unit_test = self.manifest.unit_tests[unique_id]
return unit_test.config.enabled
return True
elif unique_id in self.manifest.saved_queries:
saved_query = self.manifest.saved_queries[unique_id]
return saved_query.config.enabled
node = self.manifest.nodes[unique_id]
if self.include_empty_nodes:
return node.config.enabled
def _is_empty_node(self, unique_id: UniqueId) -> bool:
if unique_id in self.manifest.nodes:
node = self.manifest.nodes[unique_id]
return node.empty
else:
return False
return not node.empty and node.config.enabled
def node_is_match(self, node: GraphMemberNode) -> bool:
"""Determine if a node is a match for the selector. Non-match nodes
@@ -220,12 +212,7 @@ class NodeSelector(MethodManager):
"""Return the subset of selected nodes that is a match for this
selector.
"""
return {
unique_id
for unique_id in selected
if self._is_match(unique_id)
and (self.include_empty_nodes or not self._is_empty_node(unique_id))
}
return {unique_id for unique_id in selected if self._is_match(unique_id)}
def expand_selection(
self,
@@ -332,18 +319,18 @@ class NodeSelector(MethodManager):
return filtered_nodes
def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:
"""Returns a queue over nodes in the graph that tracks progress of
dependencies.
dependecies.
"""
# Filtering happens in get_selected
# Filtering hapens in get_selected
selected_nodes = self.get_selected(spec)
# Save to global variable
selected_resources.set_selected_resources(selected_nodes)
# Construct a new graph using the selected_nodes
new_graph = self.full_graph.get_subset_graph(selected_nodes)
# should we give a way here for consumers to mutate the graph?
return GraphQueue(new_graph.graph, self.manifest, selected_nodes, preserve_edges)
return GraphQueue(new_graph.graph, self.manifest, selected_nodes)
class ResourceTypeSelector(NodeSelector):

View File

@@ -109,7 +109,7 @@ def is_selected_node(fqn: List[str], node_selector: str, is_versioned: bool) ->
SelectorTarget = Union[
SourceDefinition, ManifestNode, Exposure, Metric, SemanticModel, UnitTestDefinition, SavedQuery
SourceDefinition, ManifestNode, Exposure, Metric, SemanticModel, UnitTestDefinition
]
@@ -202,7 +202,6 @@ class SelectorMethod(metaclass=abc.ABCMeta):
self.metric_nodes(included_nodes),
self.unit_tests(included_nodes),
self.semantic_model_nodes(included_nodes),
self.saved_query_nodes(included_nodes),
)
def configurable_nodes(
@@ -681,8 +680,7 @@ class StateSelectorMethod(SelectorMethod):
self, old: Optional[SelectorTarget], new: SelectorTarget, adapter_type: str
) -> bool:
if isinstance(
new,
(SourceDefinition, Exposure, Metric, SemanticModel, UnitTestDefinition, SavedQuery),
new, (SourceDefinition, Exposure, Metric, SemanticModel, UnitTestDefinition)
):
# these all overwrite `same_contents`
different_contents = not new.same_contents(old) # type: ignore
@@ -721,9 +719,7 @@ class StateSelectorMethod(SelectorMethod):
) -> Callable[[Optional[SelectorTarget], SelectorTarget], bool]:
# get a function that compares two selector target based on compare method provided
def check_modified_contract(old: Optional[SelectorTarget], new: SelectorTarget) -> bool:
if new is None and hasattr(old, compare_method + "_removed"):
return getattr(old, compare_method + "_removed")()
elif hasattr(new, compare_method):
if hasattr(new, compare_method):
# when old body does not exist or old and new are not the same
return not old or not getattr(new, compare_method)(old, adapter_type) # type: ignore
else:
@@ -777,8 +773,6 @@ class StateSelectorMethod(SelectorMethod):
previous_node = SemanticModel.from_resource(manifest.semantic_models[unique_id])
elif unique_id in manifest.unit_tests:
previous_node = UnitTestDefinition.from_resource(manifest.unit_tests[unique_id])
elif unique_id in manifest.saved_queries:
previous_node = SavedQuery.from_resource(manifest.saved_queries[unique_id])
keyword_args = {}
if checker.__name__ in [
@@ -791,22 +785,6 @@ class StateSelectorMethod(SelectorMethod):
if checker(previous_node, node, **keyword_args): # type: ignore
yield unique_id
# checkers that can handle removed nodes
if checker.__name__ in ["check_modified_contract"]:
# ignore included_nodes, since those cannot contain removed nodes
for previous_unique_id, previous_node in manifest.nodes.items():
# detect removed (deleted, renamed, or disabled) nodes
removed_node = None
if previous_unique_id in self.manifest.disabled.keys():
removed_node = self.manifest.disabled[previous_unique_id][0]
elif previous_unique_id not in self.manifest.nodes.keys():
removed_node = previous_node
if removed_node:
# do not yield -- removed nodes should never be selected for downstream execution
# as they are not part of the current project's manifest.nodes
checker(removed_node, None, **keyword_args) # type: ignore
class ResultSelectorMethod(SelectorMethod):
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:

View File

@@ -1,223 +0,0 @@
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
import pytz
from dbt.artifacts.resources.types import BatchSize
from dbt.artifacts.schemas.batch_results import BatchType
from dbt.contracts.graph.nodes import ModelNode, NodeConfig
from dbt.exceptions import DbtInternalError, DbtRuntimeError
class MicrobatchBuilder:
"""A utility class for building microbatch definitions associated with a specific model"""
def __init__(
self,
model: ModelNode,
is_incremental: bool,
event_time_start: Optional[datetime],
event_time_end: Optional[datetime],
default_end_time: Optional[datetime] = None,
):
if model.config.incremental_strategy != "microbatch":
raise DbtInternalError(
f"Model '{model.name}' does not use 'microbatch' incremental_strategy."
)
self.model = model
if self.model.config.batch_size is None:
raise DbtRuntimeError(
f"Microbatch model '{self.model.name}' does not have a 'batch_size' config (one of {[batch_size.value for batch_size in BatchSize]}) specificed."
)
self.is_incremental = is_incremental
self.event_time_start = (
event_time_start.replace(tzinfo=pytz.UTC) if event_time_start else None
)
self.event_time_end = event_time_end.replace(tzinfo=pytz.UTC) if event_time_end else None
self.default_end_time = default_end_time or datetime.now(pytz.UTC)
def build_end_time(self):
"""Defaults the end_time to the current time in UTC unless a non `None` event_time_end was provided"""
end_time = self.event_time_end or self.default_end_time
return MicrobatchBuilder.ceiling_timestamp(end_time, self.model.config.batch_size)
def build_start_time(self, checkpoint: Optional[datetime]):
"""Create a start time based off the passed in checkpoint.
If the checkpoint is `None`, or this is the first run of a microbatch model, then the
model's configured `begin` value will be returned as a checkpoint is necessary
to build a start time. This is because we build the start time relative to the checkpoint
via the batchsize and offset, and we cannot offset a checkpoint if there is no checkpoint.
"""
assert isinstance(self.model.config, NodeConfig)
batch_size = self.model.config.batch_size
# Use event_time_start if it is provided.
if self.event_time_start:
return MicrobatchBuilder.truncate_timestamp(self.event_time_start, batch_size)
# First run, use model's configured 'begin' as start.
if not self.is_incremental or checkpoint is None:
if not self.model.config.begin:
raise DbtRuntimeError(
f"Microbatch model '{self.model.name}' requires a 'begin' configuration."
)
return MicrobatchBuilder.truncate_timestamp(self.model.config.begin, batch_size)
lookback = self.model.config.lookback
# If the checkpoint is equivalent to itself truncated then the checkpoint stradles
# the batch line. In this case the last batch will end with the checkpoint, but start
# should be the previous hour/day/month/year. Thus we need to increase the lookback by
# 1 to get this affect properly.
if checkpoint == MicrobatchBuilder.truncate_timestamp(checkpoint, batch_size):
lookback += 1
return MicrobatchBuilder.offset_timestamp(checkpoint, batch_size, -1 * lookback)
def build_batches(self, start: datetime, end: datetime) -> List[BatchType]:
"""
Given a start and end datetime, builds a list of batches where each batch is
the size of the model's batch_size.
"""
batch_size = self.model.config.batch_size
curr_batch_start: datetime = start
curr_batch_end: datetime = MicrobatchBuilder.offset_timestamp(
curr_batch_start, batch_size, 1
)
batches: List[BatchType] = [(curr_batch_start, curr_batch_end)]
while curr_batch_end < end:
curr_batch_start = curr_batch_end
curr_batch_end = MicrobatchBuilder.offset_timestamp(curr_batch_start, batch_size, 1)
batches.append((curr_batch_start, curr_batch_end))
# use exact end value as stop
batches[-1] = (batches[-1][0], end)
return batches
def build_jinja_context_for_batch(self, incremental_batch: bool) -> Dict[str, Any]:
"""
Create context with entries that reflect microbatch model + incremental execution state
Assumes self.model has been (re)-compiled with necessary batch filters applied.
"""
jinja_context: Dict[str, Any] = {}
# Microbatch model properties
jinja_context["model"] = self.model.to_dict()
jinja_context["sql"] = self.model.compiled_code
jinja_context["compiled_code"] = self.model.compiled_code
# Add incremental context variables for batches running incrementally
if incremental_batch:
jinja_context["is_incremental"] = lambda: True
jinja_context["should_full_refresh"] = lambda: False
return jinja_context
@staticmethod
def offset_timestamp(timestamp: datetime, batch_size: BatchSize, offset: int) -> datetime:
"""Truncates the passed in timestamp based on the batch_size and then applies the offset by the batch_size.
Note: It's important to understand that the offset applies to the truncated timestamp, not
the origin timestamp. Thus being offset by a day isn't relative to the any given hour that day,
but relative to the start of the day. So if the timestamp is the very end of a day, 2024-09-17 23:59:59,
you have a batch size of a day, and an offset of +1, then the returned value ends up being only one
second later, 2024-09-18 00:00:00.
2024-09-17 16:06:00 + Batchsize.hour -1 -> 2024-09-17 15:00:00
2024-09-17 16:06:00 + Batchsize.hour +1 -> 2024-09-17 17:00:00
2024-09-17 16:06:00 + Batchsize.day -1 -> 2024-09-16 00:00:00
2024-09-17 16:06:00 + Batchsize.day +1 -> 2024-09-18 00:00:00
2024-09-17 16:06:00 + Batchsize.month -1 -> 2024-08-01 00:00:00
2024-09-17 16:06:00 + Batchsize.month +1 -> 2024-10-01 00:00:00
2024-09-17 16:06:00 + Batchsize.year -1 -> 2023-01-01 00:00:00
2024-09-17 16:06:00 + Batchsize.year +1 -> 2025-01-01 00:00:00
"""
truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size)
offset_timestamp: datetime
if batch_size == BatchSize.hour:
offset_timestamp = truncated + timedelta(hours=offset)
elif batch_size == BatchSize.day:
offset_timestamp = truncated + timedelta(days=offset)
elif batch_size == BatchSize.month:
offset_timestamp = truncated
for _ in range(abs(offset)):
if offset < 0:
offset_timestamp = offset_timestamp - timedelta(days=1)
else:
offset_timestamp = offset_timestamp + timedelta(days=31)
offset_timestamp = MicrobatchBuilder.truncate_timestamp(
offset_timestamp, batch_size
)
elif batch_size == BatchSize.year:
offset_timestamp = truncated.replace(year=truncated.year + offset)
return offset_timestamp
@staticmethod
def truncate_timestamp(timestamp: datetime, batch_size: BatchSize) -> datetime:
"""Truncates the passed in timestamp based on the batch_size.
2024-09-17 16:06:00 + Batchsize.hour -> 2024-09-17 16:00:00
2024-09-17 16:06:00 + Batchsize.day -> 2024-09-17 00:00:00
2024-09-17 16:06:00 + Batchsize.month -> 2024-09-01 00:00:00
2024-09-17 16:06:00 + Batchsize.year -> 2024-01-01 00:00:00
"""
if batch_size == BatchSize.hour:
truncated = datetime(
timestamp.year,
timestamp.month,
timestamp.day,
timestamp.hour,
0,
0,
0,
pytz.utc,
)
elif batch_size == BatchSize.day:
truncated = datetime(
timestamp.year, timestamp.month, timestamp.day, 0, 0, 0, 0, pytz.utc
)
elif batch_size == BatchSize.month:
truncated = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0, 0, pytz.utc)
elif batch_size == BatchSize.year:
truncated = datetime(timestamp.year, 1, 1, 0, 0, 0, 0, pytz.utc)
return truncated
@staticmethod
def batch_id(start_time: datetime, batch_size: BatchSize) -> str:
return MicrobatchBuilder.format_batch_start(start_time, batch_size).replace("-", "")
@staticmethod
def format_batch_start(batch_start: datetime, batch_size: BatchSize) -> str:
return str(
batch_start.date() if (batch_start and batch_size != BatchSize.hour) else batch_start
)
@staticmethod
def ceiling_timestamp(timestamp: datetime, batch_size: BatchSize) -> datetime:
"""Takes the given timestamp and moves it to the ceiling for the given batch size
Note, if the timestamp is already the batch size ceiling, that is returned
2024-09-17 16:06:00 + BatchSize.hour -> 2024-09-17 17:00:00
2024-09-17 16:00:00 + BatchSize.hour -> 2024-09-17 16:00:00
2024-09-17 16:06:00 + BatchSize.day -> 2024-09-18 00:00:00
2024-09-17 00:00:00 + BatchSize.day -> 2024-09-17 00:00:00
2024-09-17 16:06:00 + BatchSize.month -> 2024-10-01 00:00:00
2024-09-01 00:00:00 + BatchSize.month -> 2024-09-01 00:00:00
2024-09-17 16:06:00 + BatchSize.year -> 2025-01-01 00:00:00
2024-01-01 00:00:00 + BatchSize.year -> 2024-01-01 00:00:00
"""
ceiling = truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size)
if truncated != timestamp:
ceiling = MicrobatchBuilder.offset_timestamp(truncated, batch_size, 1)
return ceiling

Some files were not shown because too many files have changed in this diff Show More