forked from repo-mirrors/dbt-core
Compare commits
32 Commits
postgres-s
...
er/test-do
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d37a2e3d99 | ||
|
|
a5bcc728da | ||
|
|
359a2c0cc5 | ||
|
|
bbdb98fa5d | ||
|
|
a8d4ba2b4a | ||
|
|
09e973d24a | ||
|
|
730e40a867 | ||
|
|
a1e4753020 | ||
|
|
3ac20ce7a8 | ||
|
|
aa23af98e5 | ||
|
|
46da967115 | ||
|
|
db694731c9 | ||
|
|
7016cd3085 | ||
|
|
9ca10fbfd9 | ||
|
|
3308a4365e | ||
|
|
f8bfd32ed6 | ||
|
|
3e437a6734 | ||
|
|
9e633f6178 | ||
|
|
d182d06644 | ||
|
|
054c6fde37 | ||
|
|
4c326e40b5 | ||
|
|
8fe5ea1ee7 | ||
|
|
16f5023f4d | ||
|
|
c6b8f7e595 | ||
|
|
77aeb3ea68 | ||
|
|
1e20772d33 | ||
|
|
8ce2c46a2f | ||
|
|
aeaaedcaa1 | ||
|
|
6c111f2e31 | ||
|
|
139b9ac54f | ||
|
|
cc8541c05f | ||
|
|
ab500a9709 |
6
.changes/unreleased/Features-20240903-132428.yaml
Normal file
6
.changes/unreleased/Features-20240903-132428.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Enable `--resource-type` and `--exclude-resource-type` CLI flags and environment variables for `dbt test`
|
||||
time: 2024-09-03T13:24:28.592837+01:00
|
||||
custom:
|
||||
Author: TowardOliver dbeatty10
|
||||
Issue: "10656"
|
||||
6
.changes/unreleased/Features-20240903-154133.yaml
Normal file
6
.changes/unreleased/Features-20240903-154133.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Allow configuring snapshot column names
|
||||
time: 2024-09-03T15:41:33.167097-04:00
|
||||
custom:
|
||||
Author: gshank
|
||||
Issue: "10185"
|
||||
6
.changes/unreleased/Features-20240904-182320.yaml
Normal file
6
.changes/unreleased/Features-20240904-182320.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Add custom_granularities to YAML spec for time spines.
|
||||
time: 2024-09-04T18:23:20.234952-07:00
|
||||
custom:
|
||||
Author: courtneyholcomb
|
||||
Issue: "9265"
|
||||
6
.changes/unreleased/Features-20240911-121029.yaml
Normal file
6
.changes/unreleased/Features-20240911-121029.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Add basic functionality for creating microbatch incremental models
|
||||
time: 2024-09-11T12:10:29.822189-05:00
|
||||
custom:
|
||||
Author: MichelleArk QMalcolm
|
||||
Issue: 9490 10635 10637 10638 10636 10662 10639
|
||||
6
.changes/unreleased/Features-20240913-232111.yaml
Normal file
6
.changes/unreleased/Features-20240913-232111.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Execute microbatch models in batches
|
||||
time: 2024-09-13T23:21:11.935434-04:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10700"
|
||||
6
.changes/unreleased/Features-20240920-110447.yaml
Normal file
6
.changes/unreleased/Features-20240920-110447.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Allow snapshots to be defined in YAML.
|
||||
time: 2024-09-20T11:04:47.703117-04:00
|
||||
custom:
|
||||
Author: peterallenwebb
|
||||
Issue: "10246"
|
||||
6
.changes/unreleased/Features-20240920-172419.yaml
Normal file
6
.changes/unreleased/Features-20240920-172419.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Write microbatch compiled/run targets to separate files, one per batch
|
||||
time: 2024-09-20T17:24:19.219556+01:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10714"
|
||||
6
.changes/unreleased/Features-20240923-155903.yaml
Normal file
6
.changes/unreleased/Features-20240923-155903.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Support required 'begin' config for microbatch models
|
||||
time: 2024-09-23T15:59:03.924079+01:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10701"
|
||||
6
.changes/unreleased/Features-20240924-152922.yaml
Normal file
6
.changes/unreleased/Features-20240924-152922.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Added the --inline-direct parameter to 'dbt show'
|
||||
time: 2024-09-24T15:29:22.874496-04:00
|
||||
custom:
|
||||
Author: aranke peterallenwebb
|
||||
Issue: "10770"
|
||||
7
.changes/unreleased/Features-20240924-154639.yaml
Normal file
7
.changes/unreleased/Features-20240924-154639.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
kind: Features
|
||||
body: 'Parse-time validation of microbatch configs: require event_time, batch_size,
|
||||
lookback and validate input event_time'
|
||||
time: 2024-09-24T15:46:39.83112+01:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10709"
|
||||
6
.changes/unreleased/Fixes-20230801-094626.yaml
Normal file
6
.changes/unreleased/Fixes-20230801-094626.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Test case for `merge_exclude_columns`
|
||||
time: 2023-08-01T09:46:26.829362-06:00
|
||||
custom:
|
||||
Author: dbeatty10
|
||||
Issue: "8267"
|
||||
6
.changes/unreleased/Fixes-20240917-174446.yaml
Normal file
6
.changes/unreleased/Fixes-20240917-174446.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Fix `--resource-type test` for `dbt list` and `dbt build`
|
||||
time: 2024-09-17T17:44:46.121032-06:00
|
||||
custom:
|
||||
Author: dbeatty10
|
||||
Issue: "10730"
|
||||
6
.changes/unreleased/Fixes-20240922-133527.yaml
Normal file
6
.changes/unreleased/Fixes-20240922-133527.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Fix unit tests for incremental model with alias
|
||||
time: 2024-09-22T13:35:27.991398741Z
|
||||
custom:
|
||||
Author: katsugeneration
|
||||
Issue: "10754"
|
||||
6
.changes/unreleased/Fixes-20240923-190758.yaml
Normal file
6
.changes/unreleased/Fixes-20240923-190758.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Allow singular tests to be documented in properties.yml
|
||||
time: 2024-09-23T19:07:58.151069+01:00
|
||||
custom:
|
||||
Author: aranke
|
||||
Issue: "9005"
|
||||
6
.changes/unreleased/Fixes-20240923-202024.yaml
Normal file
6
.changes/unreleased/Fixes-20240923-202024.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Fixes
|
||||
body: Ignore --empty in unit test ref/source rendering
|
||||
time: 2024-09-23T20:20:24.151285+01:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10516"
|
||||
6
.changes/unreleased/Under the Hood-20240911-162730.yaml
Normal file
6
.changes/unreleased/Under the Hood-20240911-162730.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Under the Hood
|
||||
body: Add Snowplow tracking for behavior flag deprecations
|
||||
time: 2024-09-11T16:27:30.293832-04:00
|
||||
custom:
|
||||
Author: mikealfare
|
||||
Issue: "10552"
|
||||
6
.changes/unreleased/Under the Hood-20240913-213312.yaml
Normal file
6
.changes/unreleased/Under the Hood-20240913-213312.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Under the Hood
|
||||
body: Add test utility patch_microbatch_end_time for adapters testing
|
||||
time: 2024-09-13T21:33:12.482336-04:00
|
||||
custom:
|
||||
Author: michelleark
|
||||
Issue: "10713"
|
||||
6
.changes/unreleased/Under the Hood-20240916-102201.yaml
Normal file
6
.changes/unreleased/Under the Hood-20240916-102201.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Under the Hood
|
||||
body: Replace `TestSelector` with `ResourceTypeSelector`
|
||||
time: 2024-09-16T10:22:01.339462-06:00
|
||||
custom:
|
||||
Author: dbeatty10
|
||||
Issue: "10718"
|
||||
7
.changes/unreleased/Under the Hood-20240918-170325.yaml
Normal file
7
.changes/unreleased/Under the Hood-20240918-170325.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
kind: Under the Hood
|
||||
body: Standardize returning `ResourceTypeSelector` instances in `dbt list` and `dbt
|
||||
build`
|
||||
time: 2024-09-18T17:03:25.639516-06:00
|
||||
custom:
|
||||
Author: dbeatty10
|
||||
Issue: "10739"
|
||||
11
.github/actions/setup-postgres-linux/action.yml
vendored
11
.github/actions/setup-postgres-linux/action.yml
vendored
@@ -5,6 +5,15 @@ runs:
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
sudo systemctl start postgresql.service
|
||||
sudo apt-get --purge remove postgresql postgresql-*
|
||||
sudo apt update -y
|
||||
sudo apt install gnupg2 wget vim -y
|
||||
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
|
||||
curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc|sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
|
||||
sudo apt update -y
|
||||
sudo apt install postgresql-16
|
||||
sudo apt-get -y install postgresql postgresql-contrib
|
||||
sudo systemctl start postgresql
|
||||
sudo systemctl enable postgresql
|
||||
pg_isready
|
||||
sudo -u postgres bash ${{ github.action_path }}/setup_db.sh
|
||||
|
||||
@@ -5,7 +5,8 @@ runs:
|
||||
steps:
|
||||
- shell: bash
|
||||
run: |
|
||||
brew services start postgresql
|
||||
brew install postgresql@16
|
||||
brew services start postgresql@16
|
||||
echo "Check PostgreSQL service is running"
|
||||
i=10
|
||||
COMMAND='pg_isready'
|
||||
|
||||
@@ -5,8 +5,22 @@ runs:
|
||||
steps:
|
||||
- shell: pwsh
|
||||
run: |
|
||||
$pgService = Get-Service -Name postgresql*
|
||||
Write-Host -Object "Installing PostgreSQL 16 as windows service..."
|
||||
$installerArgs = @("--install_runtimes 0", "--superpassword root", "--enable_acledit 1", "--unattendedmodeui none", "--mode unattended")
|
||||
$filePath = Invoke-DownloadWithRetry -Url "https://get.enterprisedb.com/postgresql/postgresql-16.1-1-windows-x64.exe" -Path "$env:PGROOT/postgresql-16.1-1-windows-x64.exe"
|
||||
Start-Process -FilePath $filePath -ArgumentList $installerArgs -Wait -PassThru
|
||||
|
||||
Write-Host -Object "Validating PostgreSQL 16 Install..."
|
||||
Get-Service -Name postgresql*
|
||||
$pgReady = Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
|
||||
$exitCode = $pgReady.ExitCode
|
||||
if ($exitCode -ne 0) {
|
||||
Write-Host -Object "PostgreSQL is not ready. Exitcode: $exitCode"
|
||||
exit $exitCode
|
||||
}
|
||||
|
||||
Write-Host -Object "Starting PostgreSQL 16 Service..."
|
||||
$pgService = Get-Service -Name postgresql-x64-16
|
||||
Set-Service -InputObject $pgService -Status running -StartupType automatic
|
||||
Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
|
||||
$env:Path += ";$env:PGBIN"
|
||||
bash ${{ github.action_path }}/setup_db.sh
|
||||
|
||||
4
.github/workflows/docs-issue.yml
vendored
4
.github/workflows/docs-issue.yml
vendored
@@ -36,6 +36,6 @@ jobs:
|
||||
uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main
|
||||
with:
|
||||
issue_repository: "dbt-labs/docs.getdbt.com"
|
||||
issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
|
||||
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated."
|
||||
issue_title: "[Core] Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}"
|
||||
issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated.\n Originating from this issue: https://github.com/dbt-labs/dbt-core/issues/${{ github.event.issue.number }}"
|
||||
secrets: inherit
|
||||
|
||||
2
.github/workflows/release-branch-tests.yml
vendored
2
.github/workflows/release-branch-tests.yml
vendored
@@ -25,7 +25,7 @@ permissions: read-all
|
||||
|
||||
jobs:
|
||||
run_tests:
|
||||
uses: dbt-labs/actions/.github/workflows/release-branch-tests.yml@main
|
||||
uses: dbt-labs/actions/.github/workflows/release-branch-tests.yml@er/fix-fetch-latest
|
||||
with:
|
||||
workflows_to_run: '["main.yml"]'
|
||||
secrets: inherit
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -222,7 +222,7 @@ jobs:
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main
|
||||
uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@er/pin-container
|
||||
with:
|
||||
package: ${{ matrix.package }}
|
||||
version_number: ${{ inputs.version_number }}
|
||||
|
||||
2
.github/workflows/schema-check.yml
vendored
2
.github/workflows/schema-check.yml
vendored
@@ -30,7 +30,7 @@ env:
|
||||
|
||||
jobs:
|
||||
checking-schemas:
|
||||
name: "Checking schemas"
|
||||
name: "Post-merge schema changes required"
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
||||
@@ -46,9 +46,9 @@ These types of minor, non-breaking changes are tested by [tests/unit/artifacts/t
|
||||
|
||||
#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
|
||||
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
|
||||
Note this must be done AFTER the core pull request is merged, otherwise we may end up with unresolvable conflicts and schemas that are invalid prior to base pull request merge. You may create the schemas.getdbt.com pull request prior to merging the base pull request, but do not merge until afterward.
|
||||
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
|
||||
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
|
||||
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!
|
||||
|
||||
Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.
|
||||
|
||||
|
||||
@@ -68,3 +68,10 @@ class TimePeriod(StrEnum):
|
||||
|
||||
def plural(self) -> str:
|
||||
return str(self) + "s"
|
||||
|
||||
|
||||
class BatchSize(StrEnum):
|
||||
hour = "hour"
|
||||
day = "day"
|
||||
month = "month"
|
||||
year = "year"
|
||||
|
||||
@@ -80,6 +80,9 @@ class NodeConfig(NodeAndTestConfig):
|
||||
# 'mergebehavior' dictionary
|
||||
materialized: str = "view"
|
||||
incremental_strategy: Optional[str] = None
|
||||
batch_size: Any = None
|
||||
lookback: Any = 0
|
||||
begin: Any = None
|
||||
persist_docs: Dict[str, Any] = field(default_factory=dict)
|
||||
post_hook: List[Hook] = field(
|
||||
default_factory=list,
|
||||
@@ -122,6 +125,7 @@ class NodeConfig(NodeAndTestConfig):
|
||||
default_factory=ContractConfig,
|
||||
metadata=MergeBehavior.Update.meta(),
|
||||
)
|
||||
event_time: Any = None
|
||||
|
||||
def __post_init__(self):
|
||||
# we validate that node_color has a suitable value to prevent dbt-docs from crashing
|
||||
|
||||
@@ -22,9 +22,16 @@ class ModelConfig(NodeConfig):
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CustomGranularity(dbtClassMixin):
|
||||
name: str
|
||||
column_name: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimeSpine(dbtClassMixin):
|
||||
standard_granularity_column: str
|
||||
custom_granularities: List[CustomGranularity] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1,10 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Literal, Optional, Union
|
||||
|
||||
from dbt.artifacts.resources.types import NodeType
|
||||
from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation
|
||||
from dbt.artifacts.resources.v1.config import NodeConfig
|
||||
from dbt_common.dataclass_schema import ValidationError
|
||||
from dbt_common.dataclass_schema import ValidationError, dbtClassMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
class SnapshotMetaColumnNames(dbtClassMixin):
|
||||
dbt_valid_to: Optional[str] = None
|
||||
dbt_valid_from: Optional[str] = None
|
||||
dbt_scd_id: Optional[str] = None
|
||||
dbt_updated_at: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -17,6 +25,18 @@ class SnapshotConfig(NodeConfig):
|
||||
updated_at: Optional[str] = None
|
||||
# Not using Optional because of serialization issues with a Union of str and List[str]
|
||||
check_cols: Union[str, List[str], None] = None
|
||||
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
|
||||
default_factory=SnapshotMetaColumnNames
|
||||
)
|
||||
|
||||
@property
|
||||
def snapshot_table_column_names(self):
|
||||
return {
|
||||
"dbt_valid_from": self.snapshot_meta_column_names.dbt_valid_from or "dbt_valid_from",
|
||||
"dbt_valid_to": self.snapshot_meta_column_names.dbt_valid_to or "dbt_valid_to",
|
||||
"dbt_scd_id": self.snapshot_meta_column_names.dbt_scd_id or "dbt_scd_id",
|
||||
"dbt_updated_at": self.snapshot_meta_column_names.dbt_updated_at or "dbt_updated_at",
|
||||
}
|
||||
|
||||
def final_validate(self):
|
||||
if not self.strategy or not self.unique_key:
|
||||
|
||||
@@ -19,6 +19,7 @@ from dbt_common.exceptions import CompilationError
|
||||
@dataclass
|
||||
class SourceConfig(BaseConfig):
|
||||
enabled: bool = True
|
||||
event_time: Any = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -8,12 +8,15 @@ from click.exceptions import BadOptionUsage
|
||||
from click.exceptions import Exit as ClickExit
|
||||
from click.exceptions import NoSuchOption, UsageError
|
||||
|
||||
from dbt.adapters.factory import register_adapter
|
||||
from dbt.artifacts.schemas.catalog import CatalogArtifact
|
||||
from dbt.artifacts.schemas.run import RunExecutionResult
|
||||
from dbt.cli import params as p
|
||||
from dbt.cli import requires
|
||||
from dbt.cli.exceptions import DbtInternalException, DbtUsageException
|
||||
from dbt.cli.requires import setup_manifest
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.mp_context import get_mp_context
|
||||
from dbt_common.events.base_types import EventMsg
|
||||
|
||||
|
||||
@@ -165,6 +168,8 @@ def cli(ctx, **kwargs):
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.empty
|
||||
@p.event_time_start
|
||||
@p.event_time_end
|
||||
@p.exclude
|
||||
@p.export_saved_queries
|
||||
@p.full_refresh
|
||||
@@ -352,6 +357,7 @@ def compile(ctx, **kwargs):
|
||||
@p.select
|
||||
@p.selector
|
||||
@p.inline
|
||||
@p.inline_direct
|
||||
@p.target_path
|
||||
@p.threads
|
||||
@p.vars
|
||||
@@ -360,17 +366,26 @@ def compile(ctx, **kwargs):
|
||||
@requires.profile
|
||||
@requires.project
|
||||
@requires.runtime_config
|
||||
@requires.manifest
|
||||
def show(ctx, **kwargs):
|
||||
"""Generates executable SQL for a named resource or inline query, runs that SQL, and returns a preview of the
|
||||
results. Does not materialize anything to the warehouse."""
|
||||
from dbt.task.show import ShowTask
|
||||
from dbt.task.show import ShowTask, ShowTaskDirect
|
||||
|
||||
task = ShowTask(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
ctx.obj["manifest"],
|
||||
)
|
||||
if ctx.obj["flags"].inline_direct:
|
||||
# Issue the inline query directly, with no templating. Does not require
|
||||
# loading the manifest.
|
||||
register_adapter(ctx.obj["runtime_config"], get_mp_context())
|
||||
task = ShowTaskDirect(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
)
|
||||
else:
|
||||
setup_manifest(ctx)
|
||||
task = ShowTask(
|
||||
ctx.obj["flags"],
|
||||
ctx.obj["runtime_config"],
|
||||
ctx.obj["manifest"],
|
||||
)
|
||||
|
||||
results = task.run()
|
||||
success = task.interpret_results(results)
|
||||
@@ -537,6 +552,8 @@ def parse(ctx, **kwargs):
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@p.empty
|
||||
@p.event_time_start
|
||||
@p.event_time_end
|
||||
@p.select
|
||||
@p.selector
|
||||
@p.target_path
|
||||
@@ -781,6 +798,8 @@ cli.commands["source"].add_command(snapshot_freshness, "snapshot-freshness") #
|
||||
@click.pass_context
|
||||
@global_flags
|
||||
@p.exclude
|
||||
@p.resource_type
|
||||
@p.exclude_resource_type
|
||||
@p.profiles_dir
|
||||
@p.project_dir
|
||||
@p.select
|
||||
|
||||
@@ -91,6 +91,22 @@ empty = click.option(
|
||||
is_flag=True,
|
||||
)
|
||||
|
||||
event_time_end = click.option(
|
||||
"--event-time-end",
|
||||
envvar="DBT_EVENT_TIME_END",
|
||||
help="If specified, the end datetime dbt uses to filter microbatch model inputs (exclusive).",
|
||||
type=click.DateTime(),
|
||||
default=None,
|
||||
)
|
||||
|
||||
event_time_start = click.option(
|
||||
"--event-time-start",
|
||||
envvar="DBT_EVENT_TIME_START",
|
||||
help="If specified, the start datetime dbt uses to filter microbatch model inputs (inclusive).",
|
||||
type=click.DateTime(),
|
||||
default=None,
|
||||
)
|
||||
|
||||
exclude = click.option(
|
||||
"--exclude",
|
||||
envvar=None,
|
||||
@@ -471,6 +487,12 @@ inline = click.option(
|
||||
help="Pass SQL inline to dbt compile and show",
|
||||
)
|
||||
|
||||
inline_direct = click.option(
|
||||
"--inline-direct",
|
||||
envvar=None,
|
||||
help="Pass SQL inline to dbt show. Do not load the entire project or apply templating.",
|
||||
)
|
||||
|
||||
# `--select` and `--models` are analogous for most commands except `dbt list` for legacy reasons.
|
||||
# Most CLI arguments should use the combined `select` option that aliases `--models` to `--select`.
|
||||
# However, if you need to split out these separators (like `dbt ls`), use the `models` and `raw_select` options instead.
|
||||
|
||||
@@ -324,28 +324,7 @@ def manifest(*args0, write=True, write_perf_info=False):
|
||||
ctx = args[0]
|
||||
assert isinstance(ctx, Context)
|
||||
|
||||
req_strs = ["profile", "project", "runtime_config"]
|
||||
reqs = [ctx.obj.get(dep) for dep in req_strs]
|
||||
|
||||
if None in reqs:
|
||||
raise DbtProjectError("profile, project, and runtime_config required for manifest")
|
||||
|
||||
runtime_config = ctx.obj["runtime_config"]
|
||||
|
||||
# if a manifest has already been set on the context, don't overwrite it
|
||||
if ctx.obj.get("manifest") is None:
|
||||
ctx.obj["manifest"] = parse_manifest(
|
||||
runtime_config, write_perf_info, write, ctx.obj["flags"].write_json
|
||||
)
|
||||
else:
|
||||
register_adapter(runtime_config, get_mp_context())
|
||||
adapter = get_adapter(runtime_config)
|
||||
adapter.set_macro_context_generator(generate_runtime_macro_context)
|
||||
adapter.set_macro_resolver(ctx.obj["manifest"])
|
||||
query_header_context = generate_query_header_context(
|
||||
adapter.config, ctx.obj["manifest"]
|
||||
)
|
||||
adapter.connections.set_query_header(query_header_context)
|
||||
setup_manifest(ctx, write=write, write_perf_info=write_perf_info)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return update_wrapper(wrapper, func)
|
||||
@@ -355,3 +334,27 @@ def manifest(*args0, write=True, write_perf_info=False):
|
||||
if len(args0) == 0:
|
||||
return outer_wrapper
|
||||
return outer_wrapper(args0[0])
|
||||
|
||||
|
||||
def setup_manifest(ctx: Context, write: bool = True, write_perf_info: bool = False):
|
||||
"""Load the manifest and add it to the context."""
|
||||
req_strs = ["profile", "project", "runtime_config"]
|
||||
reqs = [ctx.obj.get(dep) for dep in req_strs]
|
||||
|
||||
if None in reqs:
|
||||
raise DbtProjectError("profile, project, and runtime_config required for manifest")
|
||||
|
||||
runtime_config = ctx.obj["runtime_config"]
|
||||
|
||||
# if a manifest has already been set on the context, don't overwrite it
|
||||
if ctx.obj.get("manifest") is None:
|
||||
ctx.obj["manifest"] = parse_manifest(
|
||||
runtime_config, write_perf_info, write, ctx.obj["flags"].write_json
|
||||
)
|
||||
else:
|
||||
register_adapter(runtime_config, get_mp_context())
|
||||
adapter = get_adapter(runtime_config)
|
||||
adapter.set_macro_context_generator(generate_runtime_macro_context) # type: ignore[arg-type]
|
||||
adapter.set_macro_resolver(ctx.obj["manifest"])
|
||||
query_header_context = generate_query_header_context(adapter.config, ctx.obj["manifest"]) # type: ignore[attr-defined]
|
||||
adapter.connections.set_query_header(query_header_context)
|
||||
|
||||
@@ -521,7 +521,9 @@ class Compiler:
|
||||
linker.write_graph(graph_path, manifest)
|
||||
|
||||
# writes the "compiled_code" into the target/compiled directory
|
||||
def _write_node(self, node: ManifestSQLNode) -> ManifestSQLNode:
|
||||
def _write_node(
|
||||
self, node: ManifestSQLNode, split_suffix: Optional[str] = None
|
||||
) -> ManifestSQLNode:
|
||||
if not node.extra_ctes_injected or node.resource_type in (
|
||||
NodeType.Snapshot,
|
||||
NodeType.Seed,
|
||||
@@ -530,7 +532,9 @@ class Compiler:
|
||||
fire_event(WritingInjectedSQLForNode(node_info=get_node_info()))
|
||||
|
||||
if node.compiled_code:
|
||||
node.compiled_path = node.get_target_write_path(self.config.target_path, "compiled")
|
||||
node.compiled_path = node.get_target_write_path(
|
||||
self.config.target_path, "compiled", split_suffix
|
||||
)
|
||||
node.write_node(self.config.project_root, node.compiled_path, node.compiled_code)
|
||||
return node
|
||||
|
||||
@@ -540,6 +544,7 @@ class Compiler:
|
||||
manifest: Manifest,
|
||||
extra_context: Optional[Dict[str, Any]] = None,
|
||||
write: bool = True,
|
||||
split_suffix: Optional[str] = None,
|
||||
) -> ManifestSQLNode:
|
||||
"""This is the main entry point into this code. It's called by
|
||||
CompileRunner.compile, GenericRPCRunner.compile, and
|
||||
@@ -562,7 +567,7 @@ class Compiler:
|
||||
|
||||
node, _ = self._recursively_prepend_ctes(node, manifest, extra_context)
|
||||
if write:
|
||||
self._write_node(node)
|
||||
self._write_node(node, split_suffix=split_suffix)
|
||||
return node
|
||||
|
||||
|
||||
|
||||
@@ -158,14 +158,8 @@ def _parse_versions(versions: Union[List[str], str]) -> List[VersionSpecifier]:
|
||||
return [VersionSpecifier.from_version_string(v) for v in versions]
|
||||
|
||||
|
||||
def _all_source_paths(
|
||||
model_paths: List[str],
|
||||
seed_paths: List[str],
|
||||
snapshot_paths: List[str],
|
||||
analysis_paths: List[str],
|
||||
macro_paths: List[str],
|
||||
) -> List[str]:
|
||||
paths = chain(model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths)
|
||||
def _all_source_paths(*args: List[str]) -> List[str]:
|
||||
paths = chain(*args)
|
||||
# Strip trailing slashes since the path is the same even though the name is not
|
||||
stripped_paths = map(lambda s: s.rstrip("/"), paths)
|
||||
return list(set(stripped_paths))
|
||||
@@ -409,7 +403,7 @@ class PartialProject(RenderComponents):
|
||||
snapshot_paths: List[str] = value_or(cfg.snapshot_paths, ["snapshots"])
|
||||
|
||||
all_source_paths: List[str] = _all_source_paths(
|
||||
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths
|
||||
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths, test_paths
|
||||
)
|
||||
|
||||
docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
|
||||
@@ -652,6 +646,7 @@ class Project:
|
||||
self.snapshot_paths,
|
||||
self.analysis_paths,
|
||||
self.macro_paths,
|
||||
self.test_paths,
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -8,7 +8,7 @@ from dbt.config import IsFQNResource, Project, RuntimeConfig
|
||||
from dbt.contracts.graph.model_config import get_config_for
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.utils import fqn_search
|
||||
from dbt_common.contracts.config.base import BaseConfig, _listify
|
||||
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
|
||||
from dbt_common.exceptions import DbtInternalError
|
||||
|
||||
|
||||
@@ -293,55 +293,7 @@ class ContextConfig:
|
||||
|
||||
def add_config_call(self, opts: Dict[str, Any]) -> None:
|
||||
dct = self._config_call_dict
|
||||
self._add_config_call(dct, opts)
|
||||
|
||||
@classmethod
|
||||
def _add_config_call(cls, config_call_dict, opts: Dict[str, Any]) -> None:
|
||||
# config_call_dict is already encountered configs, opts is new
|
||||
# This mirrors code in _merge_field_value in model_config.py which is similar but
|
||||
# operates on config objects.
|
||||
for k, v in opts.items():
|
||||
# MergeBehavior for post-hook and pre-hook is to collect all
|
||||
# values, instead of overwriting
|
||||
if k in BaseConfig.mergebehavior["append"]:
|
||||
if not isinstance(v, list):
|
||||
v = [v]
|
||||
if k in config_call_dict: # should always be a list here
|
||||
config_call_dict[k].extend(v)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
|
||||
elif k in BaseConfig.mergebehavior["update"]:
|
||||
if not isinstance(v, dict):
|
||||
raise DbtInternalError(f"expected dict, got {v}")
|
||||
if k in config_call_dict and isinstance(config_call_dict[k], dict):
|
||||
config_call_dict[k].update(v)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
elif k in BaseConfig.mergebehavior["dict_key_append"]:
|
||||
if not isinstance(v, dict):
|
||||
raise DbtInternalError(f"expected dict, got {v}")
|
||||
if k in config_call_dict: # should always be a dict
|
||||
for key, value in v.items():
|
||||
extend = False
|
||||
# This might start with a +, to indicate we should extend the list
|
||||
# instead of just clobbering it
|
||||
if key.startswith("+"):
|
||||
extend = True
|
||||
if key in config_call_dict[k] and extend:
|
||||
# extend the list
|
||||
config_call_dict[k][key].extend(_listify(value))
|
||||
else:
|
||||
# clobber the list
|
||||
config_call_dict[k][key] = _listify(value)
|
||||
else:
|
||||
# This is always a dictionary
|
||||
config_call_dict[k] = v
|
||||
# listify everything
|
||||
for key, value in config_call_dict[k].items():
|
||||
config_call_dict[k][key] = _listify(value)
|
||||
else:
|
||||
config_call_dict[k] = v
|
||||
merge_config_dicts(dct, opts)
|
||||
|
||||
def build_config_dict(
|
||||
self,
|
||||
|
||||
@@ -20,6 +20,7 @@ from typing_extensions import Protocol
|
||||
|
||||
from dbt import selected_resources
|
||||
from dbt.adapters.base.column import Column
|
||||
from dbt.adapters.base.relation import EventTimeFilter
|
||||
from dbt.adapters.contracts.connection import AdapterResponse
|
||||
from dbt.adapters.exceptions import MissingConfigError
|
||||
from dbt.adapters.factory import (
|
||||
@@ -27,7 +28,7 @@ from dbt.adapters.factory import (
|
||||
get_adapter_package_names,
|
||||
get_adapter_type_names,
|
||||
)
|
||||
from dbt.artifacts.resources import NodeVersion, RefArgs
|
||||
from dbt.artifacts.resources import NodeConfig, NodeVersion, RefArgs, SourceConfig
|
||||
from dbt.clients.jinja import (
|
||||
MacroGenerator,
|
||||
MacroStack,
|
||||
@@ -50,6 +51,7 @@ from dbt.contracts.graph.nodes import (
|
||||
Exposure,
|
||||
Macro,
|
||||
ManifestNode,
|
||||
ModelNode,
|
||||
Resource,
|
||||
SeedNode,
|
||||
SemanticModel,
|
||||
@@ -76,6 +78,7 @@ from dbt.exceptions import (
|
||||
SecretEnvVarLocationError,
|
||||
TargetNotFoundError,
|
||||
)
|
||||
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
|
||||
from dbt.node_types import ModelLanguage, NodeType
|
||||
from dbt.utils import MultiDict, args_to_dict
|
||||
from dbt_common.clients.jinja import MacroProtocol
|
||||
@@ -230,6 +233,27 @@ class BaseResolver(metaclass=abc.ABCMeta):
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
return 0 if getattr(self.config.args, "EMPTY", False) else None
|
||||
|
||||
def resolve_event_time_filter(self, target: ManifestNode) -> Optional[EventTimeFilter]:
|
||||
event_time_filter = None
|
||||
if (
|
||||
os.environ.get("DBT_EXPERIMENTAL_MICROBATCH")
|
||||
and (isinstance(target.config, NodeConfig) or isinstance(target.config, SourceConfig))
|
||||
and target.config.event_time
|
||||
and self.model.config.materialized == "incremental"
|
||||
and self.model.config.incremental_strategy == "microbatch"
|
||||
):
|
||||
start = self.model.config.get("__dbt_internal_microbatch_event_time_start")
|
||||
end = self.model.config.get("__dbt_internal_microbatch_event_time_end")
|
||||
|
||||
if start is not None or end is not None:
|
||||
event_time_filter = EventTimeFilter(
|
||||
field_name=target.config.event_time,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
return event_time_filter
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
|
||||
pass
|
||||
@@ -545,7 +569,11 @@ class RuntimeRefResolver(BaseRefResolver):
|
||||
def create_relation(self, target_model: ManifestNode) -> RelationProxy:
|
||||
if target_model.is_ephemeral_model:
|
||||
self.model.set_cte(target_model.unique_id, None)
|
||||
return self.Relation.create_ephemeral_from(target_model, limit=self.resolve_limit)
|
||||
return self.Relation.create_ephemeral_from(
|
||||
target_model,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
elif (
|
||||
hasattr(target_model, "defer_relation")
|
||||
and target_model.defer_relation
|
||||
@@ -563,10 +591,18 @@ class RuntimeRefResolver(BaseRefResolver):
|
||||
)
|
||||
):
|
||||
return self.Relation.create_from(
|
||||
self.config, target_model.defer_relation, limit=self.resolve_limit
|
||||
self.config,
|
||||
target_model.defer_relation,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
else:
|
||||
return self.Relation.create_from(self.config, target_model, limit=self.resolve_limit)
|
||||
return self.Relation.create_from(
|
||||
self.config,
|
||||
target_model,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_model),
|
||||
)
|
||||
|
||||
def validate(
|
||||
self,
|
||||
@@ -600,6 +636,11 @@ class OperationRefResolver(RuntimeRefResolver):
|
||||
|
||||
|
||||
class RuntimeUnitTestRefResolver(RuntimeRefResolver):
|
||||
@property
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
|
||||
return None
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
target_name: str,
|
||||
@@ -633,10 +674,20 @@ class RuntimeSourceResolver(BaseSourceResolver):
|
||||
target_kind="source",
|
||||
disabled=(isinstance(target_source, Disabled)),
|
||||
)
|
||||
return self.Relation.create_from(self.config, target_source, limit=self.resolve_limit)
|
||||
return self.Relation.create_from(
|
||||
self.config,
|
||||
target_source,
|
||||
limit=self.resolve_limit,
|
||||
event_time_filter=self.resolve_event_time_filter(target_source),
|
||||
)
|
||||
|
||||
|
||||
class RuntimeUnitTestSourceResolver(BaseSourceResolver):
|
||||
@property
|
||||
def resolve_limit(self) -> Optional[int]:
|
||||
# Unit tests should never respect --empty flag or provide a limit since they are based on fake data.
|
||||
return None
|
||||
|
||||
def resolve(self, source_name: str, table_name: str):
|
||||
target_source = self.manifest.resolve_source(
|
||||
source_name,
|
||||
@@ -933,7 +984,20 @@ class ProviderContext(ManifestContext):
|
||||
# macros/source defs aren't 'writeable'.
|
||||
if isinstance(self.model, (Macro, SourceDefinition)):
|
||||
raise MacrosSourcesUnWriteableError(node=self.model)
|
||||
self.model.build_path = self.model.get_target_write_path(self.config.target_path, "run")
|
||||
|
||||
split_suffix = None
|
||||
if (
|
||||
isinstance(self.model, ModelNode)
|
||||
and self.model.config.get("incremental_strategy") == "microbatch"
|
||||
):
|
||||
split_suffix = MicrobatchBuilder.format_batch_start(
|
||||
self.model.config.get("__dbt_internal_microbatch_event_time_start"),
|
||||
self.model.config.batch_size,
|
||||
)
|
||||
|
||||
self.model.build_path = self.model.get_target_write_path(
|
||||
self.config.target_path, "run", split_suffix=split_suffix
|
||||
)
|
||||
self.model.write_node(self.config.project_root, self.model.build_path, payload)
|
||||
return ""
|
||||
|
||||
@@ -1597,7 +1661,7 @@ class UnitTestContext(ModelContext):
|
||||
if self.model.this_input_node_unique_id:
|
||||
this_node = self.manifest.expect(self.model.this_input_node_unique_id)
|
||||
self.model.set_cte(this_node.unique_id, None) # type: ignore
|
||||
return self.adapter.Relation.add_ephemeral_prefix(this_node.name)
|
||||
return self.adapter.Relation.add_ephemeral_prefix(this_node.identifier) # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ from dbt.contracts.graph.nodes import (
|
||||
SavedQuery,
|
||||
SeedNode,
|
||||
SemanticModel,
|
||||
SingularTestNode,
|
||||
SourceDefinition,
|
||||
UnitTestDefinition,
|
||||
UnitTestFileFixture,
|
||||
@@ -89,7 +90,7 @@ DocName = str
|
||||
RefName = str
|
||||
|
||||
|
||||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]):
|
||||
def find_unique_id_for_package(storage, key, package: Optional[PackageName]) -> Optional[UniqueID]:
|
||||
if key not in storage:
|
||||
return None
|
||||
|
||||
@@ -470,6 +471,43 @@ class AnalysisLookup(RefableLookup):
|
||||
_versioned_types: ClassVar[set] = set()
|
||||
|
||||
|
||||
class SingularTestLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: "Manifest") -> None:
|
||||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, search_name, package: Optional[PackageName]) -> Optional[UniqueID]:
|
||||
return find_unique_id_for_package(self.storage, search_name, package)
|
||||
|
||||
def find(
|
||||
self, search_name, package: Optional[PackageName], manifest: "Manifest"
|
||||
) -> Optional[SingularTestNode]:
|
||||
unique_id = self.get_unique_id(search_name, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_singular_test(self, source: SingularTestNode) -> None:
|
||||
if source.search_name not in self.storage:
|
||||
self.storage[source.search_name] = {}
|
||||
|
||||
self.storage[source.search_name][source.package_name] = source.unique_id
|
||||
|
||||
def populate(self, manifest: "Manifest") -> None:
|
||||
for node in manifest.nodes.values():
|
||||
if isinstance(node, SingularTestNode):
|
||||
self.add_singular_test(node)
|
||||
|
||||
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> SingularTestNode:
|
||||
if unique_id not in manifest.nodes:
|
||||
raise dbt_common.exceptions.DbtInternalError(
|
||||
f"Singular test {unique_id} found in cache but not found in manifest"
|
||||
)
|
||||
node = manifest.nodes[unique_id]
|
||||
assert isinstance(node, SingularTestNode)
|
||||
return node
|
||||
|
||||
|
||||
def _packages_to_search(
|
||||
current_project: str,
|
||||
node_package: str,
|
||||
@@ -869,6 +907,9 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
_analysis_lookup: Optional[AnalysisLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_singular_test_lookup: Optional[SingularTestLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_parsing_info: ParsingInfo = field(
|
||||
default_factory=ParsingInfo,
|
||||
metadata={"serialize": lambda x: None, "deserialize": lambda x: None},
|
||||
@@ -1264,6 +1305,12 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self._analysis_lookup = AnalysisLookup(self)
|
||||
return self._analysis_lookup
|
||||
|
||||
@property
|
||||
def singular_test_lookup(self) -> SingularTestLookup:
|
||||
if self._singular_test_lookup is None:
|
||||
self._singular_test_lookup = SingularTestLookup(self)
|
||||
return self._singular_test_lookup
|
||||
|
||||
@property
|
||||
def external_node_unique_ids(self):
|
||||
return [node.unique_id for node in self.nodes.values() if node.is_external_node]
|
||||
@@ -1708,6 +1755,7 @@ class Manifest(MacroMethods, dbtClassMixin):
|
||||
self._semantic_model_by_measure_lookup,
|
||||
self._disabled_lookup,
|
||||
self._analysis_lookup,
|
||||
self._singular_test_lookup,
|
||||
)
|
||||
return self.__class__, args
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import hashlib
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
@@ -243,7 +244,9 @@ class NodeInfoMixin:
|
||||
|
||||
@dataclass
|
||||
class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType):
|
||||
def get_target_write_path(self, target_path: str, subdirectory: str):
|
||||
def get_target_write_path(
|
||||
self, target_path: str, subdirectory: str, split_suffix: Optional[str] = None
|
||||
):
|
||||
# This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory
|
||||
if os.path.basename(self.path) == os.path.basename(self.original_file_path):
|
||||
# One-to-one relationship of nodes to files.
|
||||
@@ -251,6 +254,15 @@ class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, Serializabl
|
||||
else:
|
||||
# Many-to-one relationship of nodes to files.
|
||||
path = os.path.join(self.original_file_path, self.path)
|
||||
|
||||
if split_suffix:
|
||||
pathlib_path = Path(path)
|
||||
path = str(
|
||||
pathlib_path.parent
|
||||
/ pathlib_path.stem
|
||||
/ (pathlib_path.stem + f"_{split_suffix}" + pathlib_path.suffix)
|
||||
)
|
||||
|
||||
target_write_path = os.path.join(target_path, subdirectory, self.package_name, path)
|
||||
return target_write_path
|
||||
|
||||
@@ -1627,6 +1639,11 @@ class ParsedMacroPatch(ParsedPatch):
|
||||
arguments: List[MacroArgument] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedSingularTestPatch(ParsedPatch):
|
||||
pass
|
||||
|
||||
|
||||
# ====================================
|
||||
# Node unions/categories
|
||||
# ====================================
|
||||
|
||||
@@ -24,6 +24,7 @@ from dbt_semantic_interfaces.implementations.semantic_manifest import (
|
||||
from dbt_semantic_interfaces.implementations.semantic_model import PydanticSemanticModel
|
||||
from dbt_semantic_interfaces.implementations.time_spine import (
|
||||
PydanticTimeSpine,
|
||||
PydanticTimeSpineCustomGranularityColumn,
|
||||
PydanticTimeSpinePrimaryColumn,
|
||||
)
|
||||
from dbt_semantic_interfaces.implementations.time_spine_table_configuration import (
|
||||
@@ -105,6 +106,12 @@ class SemanticManifest:
|
||||
name=time_spine.standard_granularity_column,
|
||||
time_granularity=standard_granularity_column.granularity,
|
||||
),
|
||||
custom_granularities=[
|
||||
PydanticTimeSpineCustomGranularityColumn(
|
||||
name=custom_granularity.name, column_name=custom_granularity.column_name
|
||||
)
|
||||
for custom_granularity in time_spine.custom_granularities
|
||||
],
|
||||
)
|
||||
pydantic_time_spines.append(pydantic_time_spine)
|
||||
if (
|
||||
|
||||
@@ -21,6 +21,7 @@ from dbt.artifacts.resources import (
|
||||
NodeVersion,
|
||||
Owner,
|
||||
Quoting,
|
||||
TimeSpine,
|
||||
UnitTestInputFixture,
|
||||
UnitTestNodeVersions,
|
||||
UnitTestOutputFixture,
|
||||
@@ -202,14 +203,14 @@ class UnparsedAnalysisUpdate(HasConfig, HasColumnDocs, HasColumnProps, HasYamlMe
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedNodeUpdate(HasConfig, HasColumnTests, HasColumnAndTestProps, HasYamlMetadata):
|
||||
quote_columns: Optional[bool] = None
|
||||
access: Optional[str] = None
|
||||
class UnparsedSingularTestUpdate(HasConfig, HasColumnProps, HasYamlMetadata):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedTimeSpine(dbtClassMixin):
|
||||
standard_granularity_column: str
|
||||
class UnparsedNodeUpdate(HasConfig, HasColumnTests, HasColumnAndTestProps, HasYamlMetadata):
|
||||
quote_columns: Optional[bool] = None
|
||||
access: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -219,7 +220,7 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
|
||||
latest_version: Optional[NodeVersion] = None
|
||||
versions: Sequence[UnparsedVersion] = field(default_factory=list)
|
||||
deprecation_date: Optional[datetime.datetime] = None
|
||||
time_spine: Optional[UnparsedTimeSpine] = None
|
||||
time_spine: Optional[TimeSpine] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.latest_version:
|
||||
@@ -254,12 +255,27 @@ class UnparsedModelUpdate(UnparsedNodeUpdate):
|
||||
f"column name '{self.time_spine.standard_granularity_column}' for model '{self.name}'. Valid names"
|
||||
f"{' for latest version' if self.latest_version else ''}: {list(column_names_to_columns.keys())}."
|
||||
)
|
||||
column = column_names_to_columns[self.time_spine.standard_granularity_column]
|
||||
if not column.granularity:
|
||||
standard_column = column_names_to_columns[self.time_spine.standard_granularity_column]
|
||||
if not standard_column.granularity:
|
||||
raise ParsingError(
|
||||
f"Time spine standard granularity column must have a granularity defined. Please add one for "
|
||||
f"column '{self.time_spine.standard_granularity_column}' in model '{self.name}'."
|
||||
)
|
||||
custom_granularity_columns_not_found = []
|
||||
for custom_granularity in self.time_spine.custom_granularities:
|
||||
column_name = (
|
||||
custom_granularity.column_name
|
||||
if custom_granularity.column_name
|
||||
else custom_granularity.name
|
||||
)
|
||||
if column_name not in column_names_to_columns:
|
||||
custom_granularity_columns_not_found.append(column_name)
|
||||
if custom_granularity_columns_not_found:
|
||||
raise ParsingError(
|
||||
"Time spine custom granularity columns do not exist in the model. "
|
||||
f"Columns not found: {custom_granularity_columns_not_found}; "
|
||||
f"Available columns: {list(column_names_to_columns.keys())}"
|
||||
)
|
||||
|
||||
def get_columns_for_version(self, version: NodeVersion) -> List[UnparsedColumn]:
|
||||
if version not in self._version_map:
|
||||
|
||||
@@ -2,8 +2,10 @@ import os
|
||||
from functools import partial
|
||||
from typing import Callable, List
|
||||
|
||||
from dbt.tracking import track_behavior_change_warn
|
||||
from dbt_common.events.base_types import EventLevel, EventMsg
|
||||
from dbt_common.events.event_manager_client import (
|
||||
add_callback_to_manager,
|
||||
add_logger_to_manager,
|
||||
cleanup_event_logger,
|
||||
get_event_manager,
|
||||
@@ -68,6 +70,7 @@ def setup_event_logger(flags, callbacks: List[Callable[[EventMsg], None]] = [])
|
||||
make_log_dir_if_missing(flags.LOG_PATH)
|
||||
event_manager = get_event_manager()
|
||||
event_manager.callbacks = callbacks.copy()
|
||||
add_callback_to_manager(track_behavior_change_warn)
|
||||
|
||||
if flags.LOG_LEVEL != "none":
|
||||
line_format = _line_format_from_str(flags.LOG_FORMAT, LineFormat.PlainText)
|
||||
|
||||
@@ -333,9 +333,9 @@ class NodeSelector(MethodManager):
|
||||
|
||||
def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
|
||||
"""Returns a queue over nodes in the graph that tracks progress of
|
||||
dependecies.
|
||||
dependencies.
|
||||
"""
|
||||
# Filtering hapens in get_selected
|
||||
# Filtering happens in get_selected
|
||||
selected_nodes = self.get_selected(spec)
|
||||
# Save to global variable
|
||||
selected_resources.set_selected_resources(selected_nodes)
|
||||
|
||||
0
core/dbt/materializations/__init__.py
Normal file
0
core/dbt/materializations/__init__.py
Normal file
0
core/dbt/materializations/incremental/__init__.py
Normal file
0
core/dbt/materializations/incremental/__init__.py
Normal file
174
core/dbt/materializations/incremental/microbatch.py
Normal file
174
core/dbt/materializations/incremental/microbatch.py
Normal file
@@ -0,0 +1,174 @@
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import pytz
|
||||
|
||||
from dbt.artifacts.resources.types import BatchSize
|
||||
from dbt.contracts.graph.nodes import ModelNode, NodeConfig
|
||||
from dbt.exceptions import DbtInternalError, DbtRuntimeError
|
||||
|
||||
|
||||
class MicrobatchBuilder:
|
||||
"""A utility class for building microbatch definitions associated with a specific model"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: ModelNode,
|
||||
is_incremental: bool,
|
||||
event_time_start: Optional[datetime],
|
||||
event_time_end: Optional[datetime],
|
||||
):
|
||||
if model.config.incremental_strategy != "microbatch":
|
||||
raise DbtInternalError(
|
||||
f"Model '{model.name}' does not use 'microbatch' incremental_strategy."
|
||||
)
|
||||
self.model = model
|
||||
|
||||
if self.model.config.batch_size is None:
|
||||
raise DbtRuntimeError(
|
||||
f"Microbatch model '{self.model.name}' does not have a 'batch_size' config (one of {[batch_size.value for batch_size in BatchSize]}) specificed."
|
||||
)
|
||||
|
||||
self.is_incremental = is_incremental
|
||||
self.event_time_start = (
|
||||
event_time_start.replace(tzinfo=pytz.UTC) if event_time_start else None
|
||||
)
|
||||
self.event_time_end = event_time_end.replace(tzinfo=pytz.UTC) if event_time_end else None
|
||||
|
||||
def build_end_time(self):
|
||||
"""Defaults the end_time to the current time in UTC unless a non `None` event_time_end was provided"""
|
||||
return self.event_time_end or datetime.now(tz=pytz.utc)
|
||||
|
||||
def build_start_time(self, checkpoint: Optional[datetime]):
|
||||
"""Create a start time based off the passed in checkpoint.
|
||||
|
||||
If the checkpoint is `None`, or this is the first run of a microbatch model, then the
|
||||
model's configured `begin` value will be returned as a checkpoint is necessary
|
||||
to build a start time. This is because we build the start time relative to the checkpoint
|
||||
via the batchsize and offset, and we cannot offset a checkpoint if there is no checkpoint.
|
||||
"""
|
||||
assert isinstance(self.model.config, NodeConfig)
|
||||
batch_size = self.model.config.batch_size
|
||||
|
||||
# Use event_time_start if it is provided.
|
||||
if self.event_time_start:
|
||||
return MicrobatchBuilder.truncate_timestamp(self.event_time_start, batch_size)
|
||||
|
||||
# First run, use model's configured 'begin' as start.
|
||||
if not self.is_incremental or checkpoint is None:
|
||||
if not self.model.config.begin:
|
||||
raise DbtRuntimeError(
|
||||
f"Microbatch model '{self.model.name}' requires a 'begin' configuration."
|
||||
)
|
||||
|
||||
return MicrobatchBuilder.truncate_timestamp(self.model.config.begin, batch_size)
|
||||
|
||||
lookback = self.model.config.lookback
|
||||
start = MicrobatchBuilder.offset_timestamp(checkpoint, batch_size, -1 * lookback)
|
||||
|
||||
return start
|
||||
|
||||
def build_batches(self, start: datetime, end: datetime) -> List[Tuple[datetime, datetime]]:
|
||||
"""
|
||||
Given a start and end datetime, builds a list of batches where each batch is
|
||||
the size of the model's batch_size.
|
||||
"""
|
||||
batch_size = self.model.config.batch_size
|
||||
curr_batch_start: datetime = start
|
||||
curr_batch_end: datetime = MicrobatchBuilder.offset_timestamp(
|
||||
curr_batch_start, batch_size, 1
|
||||
)
|
||||
|
||||
batches: List[Tuple[datetime, datetime]] = [(curr_batch_start, curr_batch_end)]
|
||||
while curr_batch_end <= end:
|
||||
curr_batch_start = curr_batch_end
|
||||
curr_batch_end = MicrobatchBuilder.offset_timestamp(curr_batch_start, batch_size, 1)
|
||||
batches.append((curr_batch_start, curr_batch_end))
|
||||
|
||||
# use exact end value as stop
|
||||
batches[-1] = (batches[-1][0], end)
|
||||
|
||||
return batches
|
||||
|
||||
@staticmethod
|
||||
def offset_timestamp(timestamp: datetime, batch_size: BatchSize, offset: int) -> datetime:
|
||||
"""Truncates the passed in timestamp based on the batch_size and then applies the offset by the batch_size.
|
||||
|
||||
Note: It's important to understand that the offset applies to the truncated timestamp, not
|
||||
the origin timestamp. Thus being offset by a day isn't relative to the any given hour that day,
|
||||
but relative to the start of the day. So if the timestamp is the very end of a day, 2024-09-17 23:59:59,
|
||||
you have a batch size of a day, and an offset of +1, then the returned value ends up being only one
|
||||
second later, 2024-09-18 00:00:00.
|
||||
|
||||
2024-09-17 16:06:00 + Batchsize.hour -1 -> 2024-09-17 15:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.hour +1 -> 2024-09-17 17:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day -1 -> 2024-09-16 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day +1 -> 2024-09-18 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month -1 -> 2024-08-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month +1 -> 2024-10-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year -1 -> 2023-01-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year +1 -> 2025-01-01 00:00:00
|
||||
"""
|
||||
truncated = MicrobatchBuilder.truncate_timestamp(timestamp, batch_size)
|
||||
|
||||
offset_timestamp: datetime
|
||||
if batch_size == BatchSize.hour:
|
||||
offset_timestamp = truncated + timedelta(hours=offset)
|
||||
elif batch_size == BatchSize.day:
|
||||
offset_timestamp = truncated + timedelta(days=offset)
|
||||
elif batch_size == BatchSize.month:
|
||||
offset_timestamp = truncated
|
||||
for _ in range(abs(offset)):
|
||||
if offset < 0:
|
||||
offset_timestamp = offset_timestamp - timedelta(days=1)
|
||||
else:
|
||||
offset_timestamp = offset_timestamp + timedelta(days=31)
|
||||
offset_timestamp = MicrobatchBuilder.truncate_timestamp(
|
||||
offset_timestamp, batch_size
|
||||
)
|
||||
elif batch_size == BatchSize.year:
|
||||
offset_timestamp = truncated.replace(year=truncated.year + offset)
|
||||
|
||||
return offset_timestamp
|
||||
|
||||
@staticmethod
|
||||
def truncate_timestamp(timestamp: datetime, batch_size: BatchSize):
|
||||
"""Truncates the passed in timestamp based on the batch_size.
|
||||
|
||||
2024-09-17 16:06:00 + Batchsize.hour -> 2024-09-17 16:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.day -> 2024-09-17 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.month -> 2024-09-01 00:00:00
|
||||
2024-09-17 16:06:00 + Batchsize.year -> 2024-01-01 00:00:00
|
||||
"""
|
||||
if batch_size == BatchSize.hour:
|
||||
truncated = datetime(
|
||||
timestamp.year,
|
||||
timestamp.month,
|
||||
timestamp.day,
|
||||
timestamp.hour,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
pytz.utc,
|
||||
)
|
||||
elif batch_size == BatchSize.day:
|
||||
truncated = datetime(
|
||||
timestamp.year, timestamp.month, timestamp.day, 0, 0, 0, 0, pytz.utc
|
||||
)
|
||||
elif batch_size == BatchSize.month:
|
||||
truncated = datetime(timestamp.year, timestamp.month, 1, 0, 0, 0, 0, pytz.utc)
|
||||
elif batch_size == BatchSize.year:
|
||||
truncated = datetime(timestamp.year, 1, 1, 0, 0, 0, 0, pytz.utc)
|
||||
|
||||
return truncated
|
||||
|
||||
@staticmethod
|
||||
def format_batch_start(
|
||||
batch_start: Optional[datetime], batch_size: BatchSize
|
||||
) -> Optional[str]:
|
||||
if batch_start is None:
|
||||
return batch_start
|
||||
|
||||
return str(
|
||||
batch_start.date() if (batch_start and batch_size != BatchSize.hour) else batch_start
|
||||
)
|
||||
@@ -26,6 +26,11 @@ REFABLE_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Snapshot,
|
||||
]
|
||||
|
||||
TEST_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Test,
|
||||
NodeType.Unit,
|
||||
]
|
||||
|
||||
VERSIONED_NODE_TYPES: List["NodeType"] = [
|
||||
NodeType.Model,
|
||||
]
|
||||
|
||||
@@ -13,6 +13,7 @@ from dbt.contracts.graph.unparsed import (
|
||||
UnparsedMacroUpdate,
|
||||
UnparsedModelUpdate,
|
||||
UnparsedNodeUpdate,
|
||||
UnparsedSingularTestUpdate,
|
||||
)
|
||||
from dbt.exceptions import ParsingError
|
||||
from dbt.parser.search import FileBlock
|
||||
@@ -38,6 +39,7 @@ Target = TypeVar(
|
||||
UnpatchedSourceDefinition,
|
||||
UnparsedExposure,
|
||||
UnparsedModelUpdate,
|
||||
UnparsedSingularTestUpdate,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ from dbt.adapters.factory import (
|
||||
register_adapter,
|
||||
)
|
||||
from dbt.artifacts.resources import FileHash, NodeRelation, NodeVersion
|
||||
from dbt.artifacts.resources.types import BatchSize
|
||||
from dbt.artifacts.schemas.base import Writable
|
||||
from dbt.clients.jinja import MacroStack, get_rendered
|
||||
from dbt.clients.jinja_static import statically_extract_macro_calls
|
||||
@@ -468,6 +469,7 @@ class ManifestLoader:
|
||||
self.check_valid_group_config()
|
||||
self.check_valid_access_property()
|
||||
self.check_valid_snapshot_config()
|
||||
self.check_valid_microbatch_config()
|
||||
|
||||
semantic_manifest = SemanticManifest(self.manifest)
|
||||
if not semantic_manifest.validate():
|
||||
@@ -1355,6 +1357,69 @@ class ManifestLoader:
|
||||
continue
|
||||
node.config.final_validate()
|
||||
|
||||
def check_valid_microbatch_config(self):
|
||||
if os.environ.get("DBT_EXPERIMENTAL_MICROBATCH"):
|
||||
for node in self.manifest.nodes.values():
|
||||
if (
|
||||
node.config.materialized == "incremental"
|
||||
and node.config.incremental_strategy == "microbatch"
|
||||
):
|
||||
# Required configs: event_time, batch_size, begin
|
||||
event_time = node.config.event_time
|
||||
if event_time is None:
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide an 'event_time' (string) config that indicates the name of the event time column."
|
||||
)
|
||||
if not isinstance(event_time, str):
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide an 'event_time' config of type string, but got: {type(event_time)}."
|
||||
)
|
||||
|
||||
begin = node.config.begin
|
||||
if begin is None:
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide a 'begin' (datetime) config that indicates the earliest timestamp the microbatch model should be built from."
|
||||
)
|
||||
|
||||
# Try to cast begin to a datetime using same format as mashumaro for consistency with other yaml-provided datetimes
|
||||
# Mashumaro default: https://github.com/Fatal1ty/mashumaro/blob/4ac16fd060a6c651053475597b58b48f958e8c5c/README.md?plain=1#L1186
|
||||
if isinstance(begin, str):
|
||||
try:
|
||||
begin = datetime.datetime.fromisoformat(begin)
|
||||
node.config.begin = begin
|
||||
except Exception:
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide a 'begin' config of valid datetime (ISO format), but got: {begin}."
|
||||
)
|
||||
|
||||
if not isinstance(begin, datetime.datetime):
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide a 'begin' config of type datetime, but got: {type(begin)}."
|
||||
)
|
||||
|
||||
batch_size = node.config.batch_size
|
||||
valid_batch_sizes = [size.value for size in BatchSize]
|
||||
if batch_size not in valid_batch_sizes:
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide a 'batch_size' config that is one of {valid_batch_sizes}, but got: {batch_size}."
|
||||
)
|
||||
|
||||
# Optional config: lookback (int)
|
||||
lookback = node.config.lookback
|
||||
if not isinstance(lookback, int) and lookback is not None:
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' must provide the optional 'lookback' config as type int, but got: {type(lookback)})."
|
||||
)
|
||||
|
||||
# Validate upstream node event_time (if configured)
|
||||
for input_unique_id in node.depends_on.nodes:
|
||||
input_node = self.manifest.expect(unique_id=input_unique_id)
|
||||
input_event_time = input_node.config.event_time
|
||||
if input_event_time and not isinstance(input_event_time, str):
|
||||
raise dbt.exceptions.ParsingError(
|
||||
f"Microbatch model '{node.name}' depends on an input node '{input_node.name}' with an 'event_time' config of invalid (non-string) type: {type(input_event_time)}."
|
||||
)
|
||||
|
||||
def write_perf_info(self, target_path: str):
|
||||
path = os.path.join(target_path, PERF_INFO_FILE_NAME)
|
||||
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
|
||||
|
||||
@@ -22,6 +22,7 @@ from dbt.flags import get_flags
|
||||
from dbt.node_types import ModelLanguage, NodeType
|
||||
from dbt.parser.base import SimpleSQLParser
|
||||
from dbt.parser.search import FileBlock
|
||||
from dbt_common.contracts.config.base import merge_config_dicts
|
||||
from dbt_common.dataclass_schema import ValidationError
|
||||
from dbt_common.exceptions.macros import UndefinedMacroError
|
||||
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
|
||||
@@ -467,7 +468,7 @@ def _get_config_call_dict(static_parser_result: Dict[str, Any]) -> Dict[str, Any
|
||||
config_call_dict: Dict[str, Any] = {}
|
||||
|
||||
for c in static_parser_result["configs"]:
|
||||
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
|
||||
merge_config_dicts(config_call_dict, {c[0]: c[1]})
|
||||
|
||||
return config_call_dict
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Type, TypeVar
|
||||
|
||||
from dbt.artifacts.resources import RefArgs
|
||||
from dbt.artifacts.resources.v1.model import TimeSpine
|
||||
from dbt.artifacts.resources.v1.model import CustomGranularity, TimeSpine
|
||||
from dbt.clients.jinja_static import statically_parse_ref_or_source
|
||||
from dbt.clients.yaml_helper import load_yaml_text
|
||||
from dbt.config import RuntimeConfig
|
||||
@@ -17,6 +17,7 @@ from dbt.contracts.graph.nodes import (
|
||||
ModelNode,
|
||||
ParsedMacroPatch,
|
||||
ParsedNodePatch,
|
||||
ParsedSingularTestPatch,
|
||||
UnpatchedSourceDefinition,
|
||||
)
|
||||
from dbt.contracts.graph.unparsed import (
|
||||
@@ -27,6 +28,7 @@ from dbt.contracts.graph.unparsed import (
|
||||
UnparsedMacroUpdate,
|
||||
UnparsedModelUpdate,
|
||||
UnparsedNodeUpdate,
|
||||
UnparsedSingularTestUpdate,
|
||||
UnparsedSourceDefinition,
|
||||
)
|
||||
from dbt.events.types import (
|
||||
@@ -205,6 +207,7 @@ class SchemaParser(SimpleParser[YamlBlock, ModelNode]):
|
||||
|
||||
# PatchParser.parse()
|
||||
if "snapshots" in dct:
|
||||
self._add_yaml_snapshot_nodes_to_manifest(dct["snapshots"], block)
|
||||
snapshot_parse_result = TestablePatchParser(self, yaml_block, "snapshots").parse()
|
||||
for test_block in snapshot_parse_result.test_blocks:
|
||||
self.generic_test_parser.parse_tests(test_block)
|
||||
@@ -221,6 +224,10 @@ class SchemaParser(SimpleParser[YamlBlock, ModelNode]):
|
||||
parser = MacroPatchParser(self, yaml_block, "macros")
|
||||
parser.parse()
|
||||
|
||||
if "data_tests" in dct:
|
||||
parser = SingularTestPatchParser(self, yaml_block, "data_tests")
|
||||
parser.parse()
|
||||
|
||||
# PatchParser.parse() (but never test_blocks)
|
||||
if "analyses" in dct:
|
||||
parser = AnalysisPatchParser(self, yaml_block, "analyses")
|
||||
@@ -265,8 +272,59 @@ class SchemaParser(SimpleParser[YamlBlock, ModelNode]):
|
||||
saved_query_parser = SavedQueryParser(self, yaml_block)
|
||||
saved_query_parser.parse()
|
||||
|
||||
def _add_yaml_snapshot_nodes_to_manifest(
|
||||
self, snapshots: List[Dict[str, Any]], block: FileBlock
|
||||
) -> None:
|
||||
"""We support the creation of simple snapshots in yaml, without an
|
||||
accompanying SQL definition. For such snapshots, the user must supply
|
||||
a 'relation' property to indicate the target of the snapshot. This
|
||||
function looks for such snapshots and adds a node to manifest for each
|
||||
one we find, since they were not added during SQL parsing."""
|
||||
|
||||
Parsed = TypeVar("Parsed", UnpatchedSourceDefinition, ParsedNodePatch, ParsedMacroPatch)
|
||||
rebuild_refs = False
|
||||
for snapshot in snapshots:
|
||||
if "relation" in snapshot:
|
||||
from dbt.parser import SnapshotParser
|
||||
|
||||
if "name" not in snapshot:
|
||||
raise ParsingError("A snapshot must define the 'name' property. ")
|
||||
|
||||
# Reuse the logic of SnapshotParser as far as possible to create
|
||||
# a new node we can add to the manifest.
|
||||
parser = SnapshotParser(self.project, self.manifest, self.root_project)
|
||||
fqn = parser.get_fqn_prefix(block.path.relative_path)
|
||||
fqn.append(snapshot["name"])
|
||||
snapshot_node = parser._create_parsetime_node(
|
||||
block,
|
||||
self.get_compiled_path(block),
|
||||
parser.initial_config(fqn),
|
||||
fqn,
|
||||
snapshot["name"],
|
||||
)
|
||||
|
||||
# Parse the expected ref() or source() expression given by
|
||||
# 'relation' so that we know what we are snapshotting.
|
||||
source_or_ref = statically_parse_ref_or_source(snapshot["relation"])
|
||||
if isinstance(source_or_ref, RefArgs):
|
||||
snapshot_node.refs.append(source_or_ref)
|
||||
else:
|
||||
snapshot_node.sources.append(source_or_ref)
|
||||
|
||||
# Implement the snapshot SQL as a simple select *
|
||||
snapshot_node.raw_code = "select * from {{ " + snapshot["relation"] + " }}"
|
||||
|
||||
# Add our new node to the manifest, and note that ref lookup collections
|
||||
# will need to be rebuilt.
|
||||
self.manifest.add_node_nofile(snapshot_node)
|
||||
rebuild_refs = True
|
||||
|
||||
if rebuild_refs:
|
||||
self.manifest.rebuild_ref_lookup()
|
||||
|
||||
|
||||
Parsed = TypeVar(
|
||||
"Parsed", UnpatchedSourceDefinition, ParsedNodePatch, ParsedMacroPatch, ParsedSingularTestPatch
|
||||
)
|
||||
NodeTarget = TypeVar("NodeTarget", UnparsedNodeUpdate, UnparsedAnalysisUpdate, UnparsedModelUpdate)
|
||||
NonSourceTarget = TypeVar(
|
||||
"NonSourceTarget",
|
||||
@@ -274,6 +332,7 @@ NonSourceTarget = TypeVar(
|
||||
UnparsedAnalysisUpdate,
|
||||
UnparsedMacroUpdate,
|
||||
UnparsedModelUpdate,
|
||||
UnparsedSingularTestUpdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -620,7 +679,14 @@ class NodePatchParser(PatchParser[NodeTarget, ParsedNodePatch], Generic[NodeTarg
|
||||
deprecation_date = block.target.deprecation_date
|
||||
time_spine = (
|
||||
TimeSpine(
|
||||
standard_granularity_column=block.target.time_spine.standard_granularity_column
|
||||
standard_granularity_column=block.target.time_spine.standard_granularity_column,
|
||||
custom_granularities=[
|
||||
CustomGranularity(
|
||||
name=custom_granularity.name,
|
||||
column_name=custom_granularity.column_name,
|
||||
)
|
||||
for custom_granularity in block.target.time_spine.custom_granularities
|
||||
],
|
||||
)
|
||||
if block.target.time_spine
|
||||
else None
|
||||
@@ -1048,6 +1114,55 @@ class AnalysisPatchParser(NodePatchParser[UnparsedAnalysisUpdate]):
|
||||
return UnparsedAnalysisUpdate
|
||||
|
||||
|
||||
class SingularTestPatchParser(PatchParser[UnparsedSingularTestUpdate, ParsedSingularTestPatch]):
|
||||
def get_block(self, node: UnparsedSingularTestUpdate) -> TargetBlock:
|
||||
return TargetBlock.from_yaml_block(self.yaml, node)
|
||||
|
||||
def _target_type(self) -> Type[UnparsedSingularTestUpdate]:
|
||||
return UnparsedSingularTestUpdate
|
||||
|
||||
def parse_patch(self, block: TargetBlock[UnparsedSingularTestUpdate], refs: ParserRef) -> None:
|
||||
patch = ParsedSingularTestPatch(
|
||||
name=block.target.name,
|
||||
description=block.target.description,
|
||||
meta=block.target.meta,
|
||||
docs=block.target.docs,
|
||||
config=block.target.config,
|
||||
original_file_path=block.target.original_file_path,
|
||||
yaml_key=block.target.yaml_key,
|
||||
package_name=block.target.package_name,
|
||||
)
|
||||
|
||||
assert isinstance(self.yaml.file, SchemaSourceFile)
|
||||
source_file: SchemaSourceFile = self.yaml.file
|
||||
|
||||
unique_id = self.manifest.singular_test_lookup.get_unique_id(
|
||||
block.name, block.target.package_name
|
||||
)
|
||||
if not unique_id:
|
||||
warn_or_error(
|
||||
NoNodeForYamlKey(
|
||||
patch_name=patch.name,
|
||||
yaml_key=patch.yaml_key,
|
||||
file_path=source_file.path.original_file_path,
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
node = self.manifest.nodes.get(unique_id)
|
||||
assert node is not None
|
||||
|
||||
source_file.append_patch(patch.yaml_key, unique_id)
|
||||
if patch.config:
|
||||
self.patch_node_config(node, patch)
|
||||
|
||||
node.patch_path = patch.file_id
|
||||
node.description = patch.description
|
||||
node.created_at = time.time()
|
||||
node.meta = patch.meta
|
||||
node.docs = patch.docs
|
||||
|
||||
|
||||
class MacroPatchParser(PatchParser[UnparsedMacroUpdate, ParsedMacroPatch]):
|
||||
def get_block(self, node: UnparsedMacroUpdate) -> TargetBlock:
|
||||
return TargetBlock.from_yaml_block(self.yaml, node)
|
||||
|
||||
@@ -11,7 +11,6 @@ from dbt.exceptions import DbtInternalError
|
||||
from dbt.graph import Graph, GraphQueue, ResourceTypeSelector
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.task.base import BaseRunner, resource_types_from_args
|
||||
from dbt.task.test import TestSelector
|
||||
from dbt_common.events.functions import fire_event
|
||||
|
||||
from .run import ModelRunner as run_model_runner
|
||||
@@ -197,12 +196,6 @@ class BuildTask(RunTask):
|
||||
|
||||
resource_types = self.resource_types(no_unit_tests)
|
||||
|
||||
if resource_types == [NodeType.Test]:
|
||||
return TestSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
)
|
||||
return ResourceTypeSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -17,7 +17,6 @@ from dbt.graph import ResourceTypeSelector
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.task.base import resource_types_from_args
|
||||
from dbt.task.runnable import GraphRunnableTask
|
||||
from dbt.task.test import TestSelector
|
||||
from dbt_common.events.contextvars import task_contextvars
|
||||
from dbt_common.events.functions import fire_event, warn_or_error
|
||||
from dbt_common.events.types import PrintEvent
|
||||
@@ -197,23 +196,16 @@ class ListTask(GraphRunnableTask):
|
||||
else:
|
||||
return self.args.select
|
||||
|
||||
def get_node_selector(self):
|
||||
def get_node_selector(self) -> ResourceTypeSelector:
|
||||
if self.manifest is None or self.graph is None:
|
||||
raise DbtInternalError("manifest and graph must be set to get perform node selection")
|
||||
if self.resource_types == [NodeType.Test]:
|
||||
return TestSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
)
|
||||
else:
|
||||
return ResourceTypeSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
resource_types=self.resource_types,
|
||||
include_empty_nodes=True,
|
||||
)
|
||||
return ResourceTypeSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
resource_types=self.resource_types,
|
||||
include_empty_nodes=True,
|
||||
)
|
||||
|
||||
def interpret_results(self, results):
|
||||
# list command should always return 0 as exit code
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import functools
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
@@ -25,18 +26,21 @@ from dbt.clients.jinja import MacroGenerator
|
||||
from dbt.config.runtime import RuntimeConfig
|
||||
from dbt.context.providers import generate_runtime_model_context
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.contracts.graph.nodes import HookNode, ResultNode
|
||||
from dbt.contracts.graph.nodes import HookNode, ModelNode, ResultNode
|
||||
from dbt.events.types import (
|
||||
LogHookEndLine,
|
||||
LogHookStartLine,
|
||||
LogModelResult,
|
||||
LogStartLine,
|
||||
RunningOperationCaughtError,
|
||||
)
|
||||
from dbt.exceptions import CompilationError, DbtInternalError, DbtRuntimeError
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.hooks import get_hook_dict
|
||||
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
|
||||
from dbt.node_types import NodeType, RunHookType
|
||||
from dbt.task.base import BaseRunner
|
||||
from dbt_common.clients.jinja import MacroProtocol
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_common.events.base_types import EventLevel
|
||||
from dbt_common.events.contextvars import log_contextvars
|
||||
@@ -182,7 +186,21 @@ class ModelRunner(CompileRunner):
|
||||
|
||||
def describe_node(self) -> str:
|
||||
# TODO CL 'language' will be moved to node level when we change representation
|
||||
return f"{self.node.language} {self.node.get_materialization()} model {self.get_node_representation()}"
|
||||
materialization_strategy = self.node.config.get("incremental_strategy")
|
||||
materialization = (
|
||||
"microbatch"
|
||||
if materialization_strategy == "microbatch"
|
||||
else self.node.get_materialization()
|
||||
)
|
||||
return f"{self.node.language} {materialization} model {self.get_node_representation()}"
|
||||
|
||||
def describe_batch(self, batch_start: Optional[datetime]) -> str:
|
||||
# Only visualize date if batch_start year/month/day
|
||||
formatted_batch_start = MicrobatchBuilder.format_batch_start(
|
||||
batch_start, self.node.config.batch_size
|
||||
)
|
||||
|
||||
return f"batch {formatted_batch_start} of {self.get_node_representation()}"
|
||||
|
||||
def print_start_line(self):
|
||||
fire_event(
|
||||
@@ -214,6 +232,51 @@ class ModelRunner(CompileRunner):
|
||||
level=level,
|
||||
)
|
||||
|
||||
def print_batch_result_line(
|
||||
self,
|
||||
result: RunResult,
|
||||
batch_start: Optional[datetime],
|
||||
batch_idx: int,
|
||||
batch_total: int,
|
||||
exception: Optional[Exception],
|
||||
):
|
||||
description = self.describe_batch(batch_start)
|
||||
if result.status == NodeStatus.Error:
|
||||
status = result.status
|
||||
level = EventLevel.ERROR
|
||||
else:
|
||||
status = result.message
|
||||
level = EventLevel.INFO
|
||||
fire_event(
|
||||
LogModelResult(
|
||||
description=description,
|
||||
status=status,
|
||||
index=batch_idx,
|
||||
total=batch_total,
|
||||
execution_time=result.execution_time,
|
||||
node_info=self.node.node_info,
|
||||
),
|
||||
level=level,
|
||||
)
|
||||
if exception:
|
||||
fire_event(RunningOperationCaughtError(exc=str(exception)))
|
||||
|
||||
def print_batch_start_line(
|
||||
self, batch_start: Optional[datetime], batch_idx: int, batch_total: int
|
||||
) -> None:
|
||||
if batch_start is None:
|
||||
return
|
||||
|
||||
batch_description = self.describe_batch(batch_start)
|
||||
fire_event(
|
||||
LogStartLine(
|
||||
description=batch_description,
|
||||
index=batch_idx,
|
||||
total=batch_total,
|
||||
node_info=self.node.node_info,
|
||||
)
|
||||
)
|
||||
|
||||
def before_execute(self) -> None:
|
||||
self.print_start_line()
|
||||
|
||||
@@ -239,6 +302,35 @@ class ModelRunner(CompileRunner):
|
||||
failures=result.get("failures"),
|
||||
)
|
||||
|
||||
def _build_run_microbatch_model_result(
|
||||
self, model: ModelNode, batch_run_results: List[RunResult]
|
||||
) -> RunResult:
|
||||
failures = sum([result.failures for result in batch_run_results if result.failures])
|
||||
return RunResult(
|
||||
node=model,
|
||||
# TODO We should do something like RunStatus.PartialSuccess if there is a mixture of success and failures
|
||||
status=RunStatus.Success if failures != len(batch_run_results) else RunStatus.Error,
|
||||
timing=[],
|
||||
thread_id=threading.current_thread().name,
|
||||
# TODO -- why isn't this getting propagated to logs?
|
||||
execution_time=0,
|
||||
message="SUCCESS" if failures != len(batch_run_results) else "ERROR",
|
||||
adapter_response={},
|
||||
failures=failures,
|
||||
)
|
||||
|
||||
def _build_failed_run_batch_result(self, model: ModelNode) -> RunResult:
|
||||
return RunResult(
|
||||
node=model,
|
||||
status=RunStatus.Error,
|
||||
timing=[],
|
||||
thread_id=threading.current_thread().name,
|
||||
execution_time=0,
|
||||
message="ERROR",
|
||||
adapter_response={},
|
||||
failures=1,
|
||||
)
|
||||
|
||||
def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
|
||||
if isinstance(result, str):
|
||||
msg = (
|
||||
@@ -256,6 +348,48 @@ class ModelRunner(CompileRunner):
|
||||
)
|
||||
raise CompilationError(msg, node=model)
|
||||
|
||||
def _execute_model(
|
||||
self,
|
||||
hook_ctx: Any,
|
||||
context_config: Any,
|
||||
model: ModelNode,
|
||||
context: Dict[str, Any],
|
||||
materialization_macro: MacroProtocol,
|
||||
) -> RunResult:
|
||||
try:
|
||||
result = MacroGenerator(
|
||||
materialization_macro, context, stack=context["context_macro_stack"]
|
||||
)()
|
||||
finally:
|
||||
self.adapter.post_model_hook(context_config, hook_ctx)
|
||||
|
||||
for relation in self._materialization_relations(result, model):
|
||||
self.adapter.cache_added(relation.incorporate(dbt_created=True))
|
||||
|
||||
return self._build_run_model_result(model, context)
|
||||
|
||||
def _execute_microbatch_model(
|
||||
self,
|
||||
hook_ctx: Any,
|
||||
context_config: Any,
|
||||
model: ModelNode,
|
||||
manifest: Manifest,
|
||||
context: Dict[str, Any],
|
||||
materialization_macro: MacroProtocol,
|
||||
) -> RunResult:
|
||||
batch_results = None
|
||||
try:
|
||||
batch_results = self._execute_microbatch_materialization(
|
||||
model, manifest, context, materialization_macro
|
||||
)
|
||||
finally:
|
||||
self.adapter.post_model_hook(context_config, hook_ctx)
|
||||
|
||||
if batch_results is not None:
|
||||
return self._build_run_microbatch_model_result(model, batch_results)
|
||||
else:
|
||||
return self._build_run_model_result(model, context)
|
||||
|
||||
def execute(self, model, manifest):
|
||||
context = generate_runtime_model_context(model, self.config, manifest)
|
||||
|
||||
@@ -284,17 +418,97 @@ class ModelRunner(CompileRunner):
|
||||
)
|
||||
|
||||
hook_ctx = self.adapter.pre_model_hook(context_config)
|
||||
try:
|
||||
result = MacroGenerator(
|
||||
materialization_macro, context, stack=context["context_macro_stack"]
|
||||
)()
|
||||
finally:
|
||||
self.adapter.post_model_hook(context_config, hook_ctx)
|
||||
|
||||
for relation in self._materialization_relations(result, model):
|
||||
self.adapter.cache_added(relation.incorporate(dbt_created=True))
|
||||
if (
|
||||
os.environ.get("DBT_EXPERIMENTAL_MICROBATCH")
|
||||
and model.config.materialized == "incremental"
|
||||
and model.config.incremental_strategy == "microbatch"
|
||||
):
|
||||
return self._execute_microbatch_model(
|
||||
hook_ctx, context_config, model, manifest, context, materialization_macro
|
||||
)
|
||||
else:
|
||||
return self._execute_model(
|
||||
hook_ctx, context_config, model, context, materialization_macro
|
||||
)
|
||||
|
||||
return self._build_run_model_result(model, context)
|
||||
def _execute_microbatch_materialization(
|
||||
self,
|
||||
model: ModelNode,
|
||||
manifest: Manifest,
|
||||
context: Dict[str, Any],
|
||||
materialization_macro: MacroProtocol,
|
||||
) -> List[RunResult]:
|
||||
batch_results: List[RunResult] = []
|
||||
microbatch_builder = MicrobatchBuilder(
|
||||
model=model,
|
||||
is_incremental=self._is_incremental(model),
|
||||
event_time_start=getattr(self.config.args, "EVENT_TIME_START", None),
|
||||
event_time_end=getattr(self.config.args, "EVENT_TIME_END", None),
|
||||
)
|
||||
end = microbatch_builder.build_end_time()
|
||||
start = microbatch_builder.build_start_time(end)
|
||||
batches = microbatch_builder.build_batches(start, end)
|
||||
|
||||
# iterate over each batch, calling materialization_macro to get a batch-level run result
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
self.print_batch_start_line(batch[0], batch_idx + 1, len(batches))
|
||||
|
||||
exception = None
|
||||
try:
|
||||
# Set start/end in context prior to re-compiling
|
||||
model.config["__dbt_internal_microbatch_event_time_start"] = batch[0]
|
||||
model.config["__dbt_internal_microbatch_event_time_end"] = batch[1]
|
||||
|
||||
# Recompile node to re-resolve refs with event time filters rendered, update context
|
||||
self.compiler.compile_node(
|
||||
model,
|
||||
manifest,
|
||||
{},
|
||||
split_suffix=MicrobatchBuilder.format_batch_start(
|
||||
batch[0], model.config.batch_size
|
||||
),
|
||||
)
|
||||
context["model"] = model
|
||||
context["sql"] = model.compiled_code
|
||||
context["compiled_code"] = model.compiled_code
|
||||
|
||||
# Materialize batch and cache any materialized relations
|
||||
result = MacroGenerator(
|
||||
materialization_macro, context, stack=context["context_macro_stack"]
|
||||
)()
|
||||
for relation in self._materialization_relations(result, model):
|
||||
self.adapter.cache_added(relation.incorporate(dbt_created=True))
|
||||
|
||||
# Build result fo executed batch
|
||||
batch_run_result = self._build_run_model_result(model, context)
|
||||
# Update context vars for future batches
|
||||
context["is_incremental"] = lambda: True
|
||||
context["should_full_refresh"] = lambda: False
|
||||
except Exception as e:
|
||||
exception = e
|
||||
batch_run_result = self._build_failed_run_batch_result(model)
|
||||
|
||||
self.print_batch_result_line(
|
||||
batch_run_result, batch[0], batch_idx + 1, len(batches), exception
|
||||
)
|
||||
batch_results.append(batch_run_result)
|
||||
|
||||
return batch_results
|
||||
|
||||
def _is_incremental(self, model) -> bool:
|
||||
# TODO: Remove. This is a temporary method. We're working with adapters on
|
||||
# a strategy to ensure we can access the `is_incremental` logic without drift
|
||||
relation_info = self.adapter.Relation.create_from(self.config, model)
|
||||
relation = self.adapter.get_relation(
|
||||
relation_info.database, relation_info.schema, relation_info.name
|
||||
)
|
||||
return (
|
||||
relation is not None
|
||||
and relation.type == "table"
|
||||
and model.config.materialized == "incremental"
|
||||
and not (getattr(self.config.args, "FULL_REFRESH", False) or model.config.full_refresh)
|
||||
)
|
||||
|
||||
|
||||
class RunTask(CompileTask):
|
||||
|
||||
@@ -2,10 +2,12 @@ import io
|
||||
import threading
|
||||
import time
|
||||
|
||||
from dbt.adapters.factory import get_adapter
|
||||
from dbt.artifacts.schemas.run import RunResult, RunStatus
|
||||
from dbt.context.providers import generate_runtime_model_context
|
||||
from dbt.contracts.graph.nodes import SeedNode
|
||||
from dbt.events.types import ShowNode
|
||||
from dbt.task.base import ConfiguredTask
|
||||
from dbt.task.compile import CompileRunner, CompileTask
|
||||
from dbt.task.seed import SeedRunner
|
||||
from dbt_common.events.base_types import EventLevel
|
||||
@@ -117,3 +119,28 @@ class ShowTask(CompileTask):
|
||||
and (self.args.select or getattr(self.args, "inline", None))
|
||||
):
|
||||
self.node_results.append(result)
|
||||
|
||||
|
||||
class ShowTaskDirect(ConfiguredTask):
|
||||
def run(self):
|
||||
adapter = get_adapter(self.config)
|
||||
with adapter.connection_named("show", should_release_connection=False):
|
||||
response, table = adapter.execute(
|
||||
self.args.inline_direct, fetch=True, limit=self.args.limit
|
||||
)
|
||||
|
||||
output = io.StringIO()
|
||||
if self.args.output == "json":
|
||||
table.to_json(path=output)
|
||||
else:
|
||||
table.print_table(output=output, max_rows=None)
|
||||
|
||||
fire_event(
|
||||
ShowNode(
|
||||
node_name="direct-query",
|
||||
preview=output.getvalue(),
|
||||
is_inline=True,
|
||||
output_format=self.args.output,
|
||||
unique_id="direct-query",
|
||||
)
|
||||
)
|
||||
|
||||
@@ -3,7 +3,17 @@ import json
|
||||
import re
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Collection,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
import daff
|
||||
|
||||
@@ -25,9 +35,9 @@ from dbt.events.types import LogStartLine, LogTestResult
|
||||
from dbt.exceptions import BooleanError, DbtInternalError
|
||||
from dbt.flags import get_flags
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.node_types import TEST_NODE_TYPES, NodeType
|
||||
from dbt.parser.unit_tests import UnitTestManifestLoader
|
||||
from dbt.task.base import BaseRunner
|
||||
from dbt.task.base import BaseRunner, resource_types_from_args
|
||||
from dbt.utils import _coerce_decimal, strtobool
|
||||
from dbt_common.dataclass_schema import dbtClassMixin
|
||||
from dbt_common.events.format import pluralize
|
||||
@@ -127,7 +137,7 @@ class TestRunner(CompileRunner):
|
||||
def execute_data_test(self, data_test: TestNode, manifest: Manifest) -> TestResultData:
|
||||
context = generate_runtime_model_context(data_test, self.config, manifest)
|
||||
|
||||
hook_ctx = self.adapter.pre_model_hook(context)
|
||||
hook_ctx = self.adapter.pre_model_hook(context["config"])
|
||||
|
||||
materialization_macro = manifest.find_materialization_macro_by_name(
|
||||
self.config.project_name, data_test.get_materialization(), self.adapter.type()
|
||||
@@ -205,7 +215,7 @@ class TestRunner(CompileRunner):
|
||||
# materialization, not compile the node.compiled_code
|
||||
context = generate_runtime_model_context(unit_test_node, self.config, unit_test_manifest)
|
||||
|
||||
hook_ctx = self.adapter.pre_model_hook(context)
|
||||
hook_ctx = self.adapter.pre_model_hook(context["config"])
|
||||
|
||||
materialization_macro = unit_test_manifest.find_materialization_macro_by_name(
|
||||
self.config.project_name, unit_test_node.get_materialization(), self.adapter.type()
|
||||
@@ -375,16 +385,6 @@ class TestRunner(CompileRunner):
|
||||
return rendered
|
||||
|
||||
|
||||
class TestSelector(ResourceTypeSelector):
|
||||
def __init__(self, graph, manifest, previous_state) -> None:
|
||||
super().__init__(
|
||||
graph=graph,
|
||||
manifest=manifest,
|
||||
previous_state=previous_state,
|
||||
resource_types=[NodeType.Test, NodeType.Unit],
|
||||
)
|
||||
|
||||
|
||||
class TestTask(RunTask):
|
||||
"""
|
||||
Testing:
|
||||
@@ -397,13 +397,24 @@ class TestTask(RunTask):
|
||||
def raise_on_first_error(self) -> bool:
|
||||
return False
|
||||
|
||||
def get_node_selector(self) -> TestSelector:
|
||||
@property
|
||||
def resource_types(self) -> List[NodeType]:
|
||||
resource_types: Collection[NodeType] = resource_types_from_args(
|
||||
self.args, set(TEST_NODE_TYPES), set(TEST_NODE_TYPES)
|
||||
)
|
||||
|
||||
# filter out any non-test node types
|
||||
resource_types = [rt for rt in resource_types if rt in TEST_NODE_TYPES]
|
||||
return list(resource_types)
|
||||
|
||||
def get_node_selector(self) -> ResourceTypeSelector:
|
||||
if self.manifest is None or self.graph is None:
|
||||
raise DbtInternalError("manifest and graph must be set to get perform node selection")
|
||||
return TestSelector(
|
||||
return ResourceTypeSelector(
|
||||
graph=self.graph,
|
||||
manifest=self.manifest,
|
||||
previous_state=self.previous_state,
|
||||
resource_types=self.resource_types,
|
||||
)
|
||||
|
||||
def get_runner_type(self, _) -> Optional[Type[BaseRunner]]:
|
||||
|
||||
@@ -6,13 +6,16 @@ from contextvars import ContextVar, copy_context
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
from typing import Any, Dict, List, Optional
|
||||
from unittest import mock
|
||||
|
||||
import pytz
|
||||
import yaml
|
||||
|
||||
from dbt.adapters.base.relation import BaseRelation
|
||||
from dbt.adapters.factory import Adapter
|
||||
from dbt.cli.main import dbtRunner
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
|
||||
from dbt_common.context import _INVOCATION_CONTEXT_VAR, InvocationContext
|
||||
from dbt_common.events.base_types import EventLevel
|
||||
from dbt_common.events.functions import (
|
||||
@@ -640,3 +643,8 @@ def safe_set_invocation_context():
|
||||
if invocation_var is None:
|
||||
invocation_var = _INVOCATION_CONTEXT_VAR
|
||||
invocation_var.set(InvocationContext(os.environ))
|
||||
|
||||
|
||||
def patch_microbatch_end_time(dt_str: str):
|
||||
dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=pytz.UTC)
|
||||
return mock.patch.object(MicrobatchBuilder, "build_end_time", return_value=dt)
|
||||
|
||||
@@ -12,6 +12,7 @@ from packaging.version import Version
|
||||
from snowplow_tracker import Emitter, SelfDescribingJson, Subject, Tracker
|
||||
from snowplow_tracker import __version__ as snowplow_version # type: ignore
|
||||
from snowplow_tracker import logger as sp_logger
|
||||
from snowplow_tracker.events import StructuredEvent
|
||||
|
||||
from dbt import version as dbt_version
|
||||
from dbt.adapters.exceptions import FailedToConnectError
|
||||
@@ -25,7 +26,8 @@ from dbt.events.types import (
|
||||
SendingEvent,
|
||||
TrackingInitializeFailure,
|
||||
)
|
||||
from dbt_common.events.functions import fire_event, get_invocation_id
|
||||
from dbt_common.events.base_types import EventMsg
|
||||
from dbt_common.events.functions import fire_event, get_invocation_id, msg_to_dict
|
||||
from dbt_common.exceptions import NotImplementedError
|
||||
|
||||
sp_logger.setLevel(100)
|
||||
@@ -36,6 +38,7 @@ DBT_INVOCATION_ENV = "DBT_INVOCATION_ENV"
|
||||
|
||||
ADAPTER_INFO_SPEC = "iglu:com.dbt/adapter_info/jsonschema/1-0-1"
|
||||
DEPRECATION_WARN_SPEC = "iglu:com.dbt/deprecation_warn/jsonschema/1-0-0"
|
||||
BEHAVIOR_CHANGE_WARN_SPEC = "iglu:com.dbt/behavior_change_warn/jsonschema/1-0-0"
|
||||
EXPERIMENTAL_PARSER = "iglu:com.dbt/experimental_parser/jsonschema/1-0-0"
|
||||
INVOCATION_ENV_SPEC = "iglu:com.dbt/invocation_env/jsonschema/1-0-0"
|
||||
INVOCATION_SPEC = "iglu:com.dbt/invocation/jsonschema/1-0-2"
|
||||
@@ -215,12 +218,12 @@ def get_dbt_env_context():
|
||||
def track(user, *args, **kwargs):
|
||||
if user.do_not_track:
|
||||
return
|
||||
else:
|
||||
fire_event(SendingEvent(kwargs=str(kwargs)))
|
||||
try:
|
||||
tracker.track_struct_event(*args, **kwargs)
|
||||
except Exception:
|
||||
fire_event(SendEventFailure())
|
||||
|
||||
fire_event(SendingEvent(kwargs=str(kwargs)))
|
||||
try:
|
||||
tracker.track(StructuredEvent(*args, **kwargs))
|
||||
except Exception:
|
||||
fire_event(SendEventFailure())
|
||||
|
||||
|
||||
def track_project_id(options):
|
||||
@@ -364,6 +367,20 @@ def track_deprecation_warn(options):
|
||||
)
|
||||
|
||||
|
||||
def track_behavior_change_warn(msg: EventMsg) -> None:
|
||||
if msg.info.name != "BehaviorChangeEvent" or active_user is None:
|
||||
return
|
||||
|
||||
context = [SelfDescribingJson(BEHAVIOR_CHANGE_WARN_SPEC, msg_to_dict(msg))]
|
||||
track(
|
||||
active_user,
|
||||
category="dbt",
|
||||
action=msg.info.name,
|
||||
label=get_invocation_id(),
|
||||
context=context,
|
||||
)
|
||||
|
||||
|
||||
def track_invocation_end(invocation_context, result_type=None):
|
||||
data = {"progress": "end", "result_type": result_type, "result": None}
|
||||
data.update(invocation_context)
|
||||
|
||||
@@ -69,10 +69,10 @@ setup(
|
||||
# These are major-version-0 packages also maintained by dbt-labs.
|
||||
# Accept patches but avoid automatically updating past a set minor version range.
|
||||
"dbt-extractor>=0.5.0,<=0.6",
|
||||
"dbt-semantic-interfaces>=0.7.0,<0.8",
|
||||
"dbt-semantic-interfaces>=0.7.1,<0.8",
|
||||
# Minor versions for these are expected to be backwards-compatible
|
||||
"dbt-common>=1.6.0,<2.0",
|
||||
"dbt-adapters>=1.3.0,<2.0",
|
||||
"dbt-common>=1.9.0,<2.0",
|
||||
"dbt-adapters>=1.7.0,<2.0",
|
||||
# ----
|
||||
# Expect compatibility with all new versions of these packages, so lower bounds only.
|
||||
"packaging>20.9",
|
||||
|
||||
@@ -259,6 +259,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -434,6 +440,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"delimiter": {
|
||||
"type": "string",
|
||||
"default": ","
|
||||
@@ -909,6 +918,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -1083,6 +1098,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -1283,6 +1301,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -1457,6 +1481,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -2704,6 +2731,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -2878,6 +2911,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -3488,6 +3524,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -3663,6 +3705,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"access": {
|
||||
"enum": [
|
||||
"private",
|
||||
@@ -4399,6 +4444,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -4573,6 +4624,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -4618,6 +4672,33 @@
|
||||
"properties": {
|
||||
"standard_granularity_column": {
|
||||
"type": "string"
|
||||
},
|
||||
"custom_granularities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"title": "CustomGranularity",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"column_name": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@@ -4800,6 +4881,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -4974,6 +5061,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -6282,6 +6372,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -6451,6 +6547,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"strategy": {
|
||||
"anyOf": [
|
||||
{
|
||||
@@ -6511,6 +6610,57 @@
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"snapshot_meta_column_names": {
|
||||
"type": "object",
|
||||
"title": "SnapshotMetaColumnNames",
|
||||
"properties": {
|
||||
"dbt_valid_to": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_valid_from": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_scd_id": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_updated_at": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -7117,6 +7267,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -7291,6 +7447,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -7873,6 +8032,9 @@
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -9874,6 +10036,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -10049,6 +10217,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"delimiter": {
|
||||
"type": "string",
|
||||
"default": ","
|
||||
@@ -10524,6 +10695,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -10698,6 +10875,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -10898,6 +11078,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -11072,6 +11258,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -12319,6 +12508,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -12493,6 +12688,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -13103,6 +13301,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -13278,6 +13482,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"access": {
|
||||
"enum": [
|
||||
"private",
|
||||
@@ -14014,6 +14221,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -14188,6 +14401,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -14233,6 +14449,33 @@
|
||||
"properties": {
|
||||
"standard_granularity_column": {
|
||||
"type": "string"
|
||||
},
|
||||
"custom_granularities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"title": "CustomGranularity",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"column_name": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@@ -14415,6 +14658,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -14589,6 +14838,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -15897,6 +16149,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -16066,6 +16324,9 @@
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
},
|
||||
"strategy": {
|
||||
"anyOf": [
|
||||
{
|
||||
@@ -16126,6 +16387,57 @@
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"snapshot_meta_column_names": {
|
||||
"type": "object",
|
||||
"title": "SnapshotMetaColumnNames",
|
||||
"properties": {
|
||||
"dbt_valid_to": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_valid_from": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_scd_id": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"dbt_updated_at": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -16732,6 +17044,12 @@
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
"batch_size": {
|
||||
"default": null
|
||||
},
|
||||
"lookback": {
|
||||
"default": 0
|
||||
},
|
||||
"persist_docs": {
|
||||
"type": "object",
|
||||
"propertyNames": {
|
||||
@@ -16906,6 +17224,9 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
@@ -17479,6 +17800,9 @@
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"event_time": {
|
||||
"default": null
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
|
||||
@@ -39,6 +39,7 @@ for i in {1..10}; do
|
||||
done;
|
||||
|
||||
createdb dbt
|
||||
psql -c "SELECT version();"
|
||||
psql -c "CREATE ROLE root WITH PASSWORD 'password';"
|
||||
psql -c "ALTER ROLE root WITH LOGIN;"
|
||||
psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;"
|
||||
|
||||
@@ -115,3 +115,7 @@ class BaseMergeExcludeColumns:
|
||||
update_sql_file=None,
|
||||
)
|
||||
self.check_scenario_correctness(expected_fields, test_case_fields, project)
|
||||
|
||||
|
||||
class TestMergeExcludeColumns(BaseMergeExcludeColumns):
|
||||
pass
|
||||
|
||||
@@ -39,6 +39,10 @@ def get_rendered_model_config(**updates):
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
}
|
||||
result.update(updates)
|
||||
return result
|
||||
@@ -74,6 +78,10 @@ def get_rendered_seed_config(**updates):
|
||||
"incremental_strategy": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
}
|
||||
result.update(updates)
|
||||
return result
|
||||
@@ -97,6 +105,12 @@ def get_rendered_snapshot_config(**updates):
|
||||
"post-hook": [],
|
||||
"column_types": {},
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {
|
||||
"dbt_valid_to": None,
|
||||
"dbt_valid_from": None,
|
||||
"dbt_updated_at": None,
|
||||
"dbt_scd_id": None,
|
||||
},
|
||||
"tags": [],
|
||||
"persist_docs": {},
|
||||
"full_refresh": None,
|
||||
@@ -114,6 +128,10 @@ def get_rendered_snapshot_config(**updates):
|
||||
"incremental_strategy": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
}
|
||||
result.update(updates)
|
||||
return result
|
||||
@@ -752,6 +770,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
},
|
||||
"config": {
|
||||
"enabled": True,
|
||||
"event_time": None,
|
||||
},
|
||||
"quoting": {
|
||||
"database": None,
|
||||
@@ -1254,6 +1273,7 @@ def expected_references_manifest(project):
|
||||
},
|
||||
"config": {
|
||||
"enabled": True,
|
||||
"event_time": None,
|
||||
},
|
||||
"quoting": {
|
||||
"database": False,
|
||||
|
||||
32
tests/functional/data_test_patch/fixtures.py
Normal file
32
tests/functional/data_test_patch/fixtures.py
Normal file
@@ -0,0 +1,32 @@
|
||||
tests__my_singular_test_sql = """
|
||||
with my_cte as (
|
||||
select 1 as id, 'foo' as name
|
||||
union all
|
||||
select 2 as id, 'bar' as name
|
||||
)
|
||||
select * from my_cte
|
||||
"""
|
||||
|
||||
tests__schema_yml = """
|
||||
data_tests:
|
||||
- name: my_singular_test
|
||||
description: "{{ doc('my_singular_test_documentation') }}"
|
||||
config:
|
||||
error_if: ">10"
|
||||
meta:
|
||||
some_key: some_val
|
||||
"""
|
||||
|
||||
tests__doc_block_md = """
|
||||
{% docs my_singular_test_documentation %}
|
||||
|
||||
Some docs from a doc block
|
||||
|
||||
{% enddocs %}
|
||||
"""
|
||||
|
||||
tests__invalid_name_schema_yml = """
|
||||
data_tests:
|
||||
- name: my_double_test
|
||||
description: documentation, but make it double
|
||||
"""
|
||||
53
tests/functional/data_test_patch/test_singular_test_patch.py
Normal file
53
tests/functional/data_test_patch/test_singular_test_patch.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from dbt.tests.util import get_artifact, run_dbt, run_dbt_and_capture
|
||||
from tests.functional.data_test_patch.fixtures import (
|
||||
tests__doc_block_md,
|
||||
tests__invalid_name_schema_yml,
|
||||
tests__my_singular_test_sql,
|
||||
tests__schema_yml,
|
||||
)
|
||||
|
||||
|
||||
class TestPatchSingularTest:
|
||||
@pytest.fixture(scope="class")
|
||||
def tests(self):
|
||||
return {
|
||||
"my_singular_test.sql": tests__my_singular_test_sql,
|
||||
"schema.yml": tests__schema_yml,
|
||||
"doc_block.md": tests__doc_block_md,
|
||||
}
|
||||
|
||||
def test_compile(self, project):
|
||||
run_dbt(["compile"])
|
||||
manifest = get_artifact(project.project_root, "target", "manifest.json")
|
||||
assert len(manifest["nodes"]) == 1
|
||||
|
||||
my_singular_test_node = manifest["nodes"]["test.test.my_singular_test"]
|
||||
assert my_singular_test_node["description"] == "Some docs from a doc block"
|
||||
assert my_singular_test_node["config"]["error_if"] == ">10"
|
||||
assert my_singular_test_node["config"]["meta"] == {"some_key": "some_val"}
|
||||
|
||||
|
||||
class TestPatchSingularTestInvalidName:
|
||||
@pytest.fixture(scope="class")
|
||||
def tests(self):
|
||||
return {
|
||||
"my_singular_test.sql": tests__my_singular_test_sql,
|
||||
"schema_with_invalid_name.yml": tests__invalid_name_schema_yml,
|
||||
}
|
||||
|
||||
def test_compile(self, project):
|
||||
_, log_output = run_dbt_and_capture(["compile"])
|
||||
|
||||
file_path = (
|
||||
"tests\\schema_with_invalid_name.yml"
|
||||
if os.name == "nt"
|
||||
else "tests/schema_with_invalid_name.yml"
|
||||
)
|
||||
assert (
|
||||
f"Did not find matching node for patch with name 'my_double_test' in the 'data_tests' section of file '{file_path}'"
|
||||
in log_output
|
||||
)
|
||||
@@ -63,6 +63,12 @@ class TestList:
|
||||
"persist_docs": {},
|
||||
"target_database": happy_path_project.database,
|
||||
"target_schema": happy_path_project.test_schema,
|
||||
"snapshot_meta_column_names": {
|
||||
"dbt_scd_id": None,
|
||||
"dbt_updated_at": None,
|
||||
"dbt_valid_from": None,
|
||||
"dbt_valid_to": None,
|
||||
},
|
||||
"unique_key": "id",
|
||||
"strategy": "timestamp",
|
||||
"updated_at": "updated_at",
|
||||
@@ -79,6 +85,10 @@ class TestList:
|
||||
"incremental_strategy": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"unique_id": "snapshot.test.my_snapshot",
|
||||
"original_file_path": normalize("snapshots/snapshot.sql"),
|
||||
@@ -121,6 +131,10 @@ class TestList:
|
||||
"incremental_strategy": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"unique_id": "analysis.test.a",
|
||||
"original_file_path": normalize("analyses/a.sql"),
|
||||
@@ -182,6 +196,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/ephemeral.sql"),
|
||||
"unique_id": "model.test.ephemeral",
|
||||
@@ -220,6 +238,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/incremental.sql"),
|
||||
"unique_id": "model.test.incremental",
|
||||
@@ -258,6 +280,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/sub/inner.sql"),
|
||||
"unique_id": "model.test.inner",
|
||||
@@ -296,6 +322,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/metricflow_time_spine.sql"),
|
||||
"unique_id": "model.test.metricflow_time_spine",
|
||||
@@ -334,6 +364,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/metricflow_time_spine_second.sql"),
|
||||
"unique_id": "model.test.metricflow_time_spine_second",
|
||||
@@ -372,6 +406,10 @@ class TestList:
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"original_file_path": normalize("models/outer.sql"),
|
||||
"unique_id": "model.test.outer",
|
||||
@@ -440,6 +478,7 @@ class TestList:
|
||||
"json": {
|
||||
"config": {
|
||||
"enabled": True,
|
||||
"event_time": None,
|
||||
},
|
||||
"unique_id": "source.test.my_source.my_table",
|
||||
"original_file_path": normalize("models/schema.yml"),
|
||||
@@ -490,6 +529,10 @@ class TestList:
|
||||
"incremental_strategy": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"event_time": None,
|
||||
"lookback": 0,
|
||||
"batch_size": None,
|
||||
"begin": None,
|
||||
},
|
||||
"depends_on": {"macros": []},
|
||||
"unique_id": "seed.test.seed",
|
||||
|
||||
524
tests/functional/microbatch/test_microbatch.py
Normal file
524
tests/functional/microbatch/test_microbatch.py
Normal file
@@ -0,0 +1,524 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from dbt.tests.util import (
|
||||
patch_microbatch_end_time,
|
||||
read_file,
|
||||
relation_from_name,
|
||||
run_dbt,
|
||||
run_dbt_and_capture,
|
||||
write_file,
|
||||
)
|
||||
|
||||
input_model_sql = """
|
||||
{{ config(materialized='table', event_time='event_time') }}
|
||||
|
||||
select 1 as id, TIMESTAMP '2020-01-01 00:00:00-0' as event_time
|
||||
union all
|
||||
select 2 as id, TIMESTAMP '2020-01-02 00:00:00-0' as event_time
|
||||
union all
|
||||
select 3 as id, TIMESTAMP '2020-01-03 00:00:00-0' as event_time
|
||||
"""
|
||||
|
||||
input_model_without_event_time_sql = """
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
select 1 as id, TIMESTAMP '2020-01-01 00:00:00-0' as event_time
|
||||
union all
|
||||
select 2 as id, TIMESTAMP '2020-01-02 00:00:00-0' as event_time
|
||||
union all
|
||||
select 3 as id, TIMESTAMP '2020-01-03 00:00:00-0' as event_time
|
||||
"""
|
||||
|
||||
microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
|
||||
microbatch_model_ref_render_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
select * from {{ ref('input_model').render() }}
|
||||
"""
|
||||
|
||||
seed_csv = """id,event_time
|
||||
1,'2020-01-01 00:00:00-0'
|
||||
2,'2020-01-02 00:00:00-0'
|
||||
3,'2020-01-03 00:00:00-0'
|
||||
"""
|
||||
|
||||
seeds_yaml = """
|
||||
seeds:
|
||||
- name: raw_source
|
||||
config:
|
||||
column_types:
|
||||
event_time: TIMESTAMP
|
||||
"""
|
||||
|
||||
sources_yaml = """
|
||||
sources:
|
||||
- name: seed_sources
|
||||
schema: "{{ target.schema }}"
|
||||
tables:
|
||||
- name: raw_source
|
||||
config:
|
||||
event_time: event_time
|
||||
"""
|
||||
|
||||
microbatch_model_calling_source_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
select * from {{ source('seed_sources', 'raw_source') }}
|
||||
"""
|
||||
|
||||
custom_microbatch_strategy = """
|
||||
{% macro get_incremental_microbatch_sql(arg_dict) %}
|
||||
{% do log('custom microbatch strategy', info=True) %}
|
||||
|
||||
{%- set dest_cols_csv = get_quoted_csv(arg_dict["dest_columns"] | map(attribute="name")) -%}
|
||||
|
||||
insert into {{ arg_dict["target_relation"] }} ({{ dest_cols_csv }})
|
||||
(
|
||||
select {{ dest_cols_csv }}
|
||||
from {{ arg_dict["temp_relation"] }}
|
||||
)
|
||||
|
||||
{% endmacro %}
|
||||
"""
|
||||
|
||||
|
||||
class BaseMicrobatchCustomUserStrategy:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_sql,
|
||||
"microbatch_model.sql": microbatch_model_sql,
|
||||
}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def macros(self):
|
||||
return {"microbatch.sql": custom_microbatch_strategy}
|
||||
|
||||
|
||||
class TestMicrobatchCustomUserStrategyDefault(BaseMicrobatchCustomUserStrategy):
|
||||
def test_use_custom_microbatch_strategy_by_default(self, project):
|
||||
with mock.patch.object(
|
||||
type(project.adapter), "valid_incremental_strategies", lambda _: []
|
||||
):
|
||||
# Initial run
|
||||
run_dbt(["run"])
|
||||
|
||||
# Incremental run uses custom strategy
|
||||
_, logs = run_dbt_and_capture(["run"])
|
||||
assert "custom microbatch strategy" in logs
|
||||
|
||||
|
||||
class TestMicrobatchCustomUserStrategyEnvVarTrueValid(BaseMicrobatchCustomUserStrategy):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_use_custom_microbatch_strategy_env_var_true_invalid_incremental_strategy(
|
||||
self, project
|
||||
):
|
||||
with mock.patch.object(
|
||||
type(project.adapter), "valid_incremental_strategies", lambda _: ["microbatch"]
|
||||
):
|
||||
# Initial run
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
|
||||
# Incremental run uses custom strategy
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
_, logs = run_dbt_and_capture(["run"])
|
||||
assert "custom microbatch strategy" in logs
|
||||
|
||||
|
||||
# TODO: Consider a behaviour flag here if DBT_EXPERIMENTAL_MICROBATCH is removed
|
||||
# Since this causes an exception prior to using an override
|
||||
class TestMicrobatchCustomUserStrategyEnvVarTrueInvalid(BaseMicrobatchCustomUserStrategy):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_use_custom_microbatch_strategy_env_var_true_invalid_incremental_strategy(
|
||||
self, project
|
||||
):
|
||||
with mock.patch.object(
|
||||
type(project.adapter), "valid_incremental_strategies", lambda _: []
|
||||
):
|
||||
# Initial run
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
|
||||
# Incremental run fails
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
_, logs = run_dbt_and_capture(["run"], expect_pass=False)
|
||||
assert "'microbatch' is not valid" in logs
|
||||
|
||||
|
||||
class BaseMicrobatchTest:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_sql,
|
||||
"microbatch_model.sql": microbatch_model_sql,
|
||||
}
|
||||
|
||||
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
|
||||
relation = relation_from_name(project.adapter, relation_name)
|
||||
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
|
||||
|
||||
if result[0] != expected_row_count:
|
||||
# running show for debugging
|
||||
run_dbt(["show", "--inline", f"select * from {relation}"])
|
||||
|
||||
assert result[0] == expected_row_count
|
||||
|
||||
|
||||
class TestMicrobatchCLI(BaseMicrobatchTest):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# run without --event-time-start or --event-time-end - 3 expected rows in output
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# build model >= 2020-01-02
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run", "--event-time-start", "2020-01-02", "--full-refresh"])
|
||||
self.assert_row_count(project, "microbatch_model", 2)
|
||||
|
||||
# build model < 2020-01-03
|
||||
run_dbt(["run", "--event-time-end", "2020-01-03", "--full-refresh"])
|
||||
self.assert_row_count(project, "microbatch_model", 2)
|
||||
|
||||
# build model between 2020-01-02 >= event_time < 2020-01-03
|
||||
run_dbt(
|
||||
[
|
||||
"run",
|
||||
"--event-time-start",
|
||||
"2020-01-02",
|
||||
"--event-time-end",
|
||||
"2020-01-03",
|
||||
"--full-refresh",
|
||||
]
|
||||
)
|
||||
self.assert_row_count(project, "microbatch_model", 1)
|
||||
|
||||
|
||||
class TestMicroBatchBoundsDefault(BaseMicrobatchTest):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# initial run -- backfills all data
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# our partition grain is "day" so running the same day without new data should produce the same results
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# add next two days of data
|
||||
test_schema_relation = project.adapter.Relation.create(
|
||||
database=project.database, schema=project.test_schema
|
||||
)
|
||||
project.run_sql(
|
||||
f"insert into {test_schema_relation}.input_model(id, event_time) values (4, TIMESTAMP '2020-01-04 00:00:00-0'), (5, TIMESTAMP '2020-01-05 00:00:00-0')"
|
||||
)
|
||||
self.assert_row_count(project, "input_model", 5)
|
||||
|
||||
# re-run without changing current time => no insert
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# re-run by advancing time by one day changing current time => insert 1 row
|
||||
with patch_microbatch_end_time("2020-01-04 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 4)
|
||||
|
||||
# re-run by advancing time by one more day changing current time => insert 1 more row
|
||||
with patch_microbatch_end_time("2020-01-05 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 5)
|
||||
|
||||
|
||||
class TestMicrobatchWithSource(BaseMicrobatchTest):
|
||||
@pytest.fixture(scope="class")
|
||||
def seeds(self):
|
||||
return {
|
||||
"raw_source.csv": seed_csv,
|
||||
}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"microbatch_model.sql": microbatch_model_calling_source_sql,
|
||||
"sources.yml": sources_yaml,
|
||||
"seeds.yml": seeds_yaml,
|
||||
}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# ensure seed is created for source
|
||||
run_dbt(["seed"])
|
||||
|
||||
# initial run -- backfills all data
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# our partition grain is "day" so running the same day without new data should produce the same results
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# add next two days of data
|
||||
test_schema_relation = project.adapter.Relation.create(
|
||||
database=project.database, schema=project.test_schema
|
||||
)
|
||||
project.run_sql(
|
||||
f"insert into {test_schema_relation}.raw_source(id, event_time) values (4, TIMESTAMP '2020-01-04 00:00:00-0'), (5, TIMESTAMP '2020-01-05 00:00:00-0')"
|
||||
)
|
||||
self.assert_row_count(project, "raw_source", 5)
|
||||
|
||||
# re-run without changing current time => no insert
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# re-run by advancing time by one day changing current time => insert 1 row
|
||||
with patch_microbatch_end_time("2020-01-04 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 4)
|
||||
|
||||
# re-run by advancing time by one more day changing current time => insert 1 more row
|
||||
with patch_microbatch_end_time("2020-01-05 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 5)
|
||||
|
||||
|
||||
class TestMicrobatchWithInputWithoutEventTime(BaseMicrobatchTest):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_without_event_time_sql,
|
||||
"microbatch_model.sql": microbatch_model_sql,
|
||||
}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# initial run -- backfills all data
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# our partition grain is "day" so running the same day without new data should produce the same results
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# add next two days of data
|
||||
test_schema_relation = project.adapter.Relation.create(
|
||||
database=project.database, schema=project.test_schema
|
||||
)
|
||||
project.run_sql(
|
||||
f"insert into {test_schema_relation}.input_model(id, event_time) values (4, TIMESTAMP '2020-01-04 00:00:00-0'), (5, TIMESTAMP '2020-01-05 00:00:00-0')"
|
||||
)
|
||||
self.assert_row_count(project, "input_model", 5)
|
||||
|
||||
# re-run without changing current time => INSERT BECAUSE INPUT MODEL ISN'T BEING FILTERED
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 5)
|
||||
|
||||
|
||||
class TestMicrobatchUsingRefRenderSkipsFilter(BaseMicrobatchTest):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# initial run -- backfills all data
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# our partition grain is "day" so running the same day without new data should produce the same results
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# add next two days of data
|
||||
test_schema_relation = project.adapter.Relation.create(
|
||||
database=project.database, schema=project.test_schema
|
||||
)
|
||||
project.run_sql(
|
||||
f"insert into {test_schema_relation}.input_model(id, event_time) values (4, TIMESTAMP '2020-01-04 00:00:00-0'), (5, TIMESTAMP '2020-01-05 00:00:00-0')"
|
||||
)
|
||||
self.assert_row_count(project, "input_model", 5)
|
||||
|
||||
# re-run without changing current time => no insert
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 3)
|
||||
|
||||
# Update microbatch model to call .render() on ref('input_model')
|
||||
write_file(
|
||||
microbatch_model_ref_render_sql, project.project_root, "models", "microbatch_model.sql"
|
||||
)
|
||||
|
||||
# re-run without changing current time => INSERT because .render() skips filtering
|
||||
with patch_microbatch_end_time("2020-01-03 14:57:00"):
|
||||
run_dbt(["run", "--select", "microbatch_model"])
|
||||
self.assert_row_count(project, "microbatch_model", 5)
|
||||
|
||||
|
||||
microbatch_model_context_vars = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
{{ log("start: "~ model.config.__dbt_internal_microbatch_event_time_start, info=True)}}
|
||||
{{ log("end: "~ model.config.__dbt_internal_microbatch_event_time_end, info=True)}}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
|
||||
class TestMicrobatchJinjaContextVarsAvailable(BaseMicrobatchTest):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_sql,
|
||||
"microbatch_model.sql": microbatch_model_context_vars,
|
||||
}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time_logs(self, project):
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
_, logs = run_dbt_and_capture(["run", "--event-time-start", "2020-01-01"])
|
||||
|
||||
assert "start: 2020-01-01 00:00:00+00:00" in logs
|
||||
assert "end: 2020-01-02 00:00:00+00:00" in logs
|
||||
|
||||
assert "start: 2020-01-02 00:00:00+00:00" in logs
|
||||
assert "end: 2020-01-03 00:00:00+00:00" in logs
|
||||
|
||||
assert "start: 2020-01-03 00:00:00+00:00" in logs
|
||||
assert "end: 2020-01-03 13:57:00+00:00" in logs
|
||||
|
||||
|
||||
microbatch_model_failing_incremental_partition_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
{% if '2020-01-02' in (model.config.__dbt_internal_microbatch_event_time_start | string) %}
|
||||
invalid_sql
|
||||
{% endif %}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
|
||||
class TestMicrobatchIncrementalPartitionFailure(BaseMicrobatchTest):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_sql,
|
||||
"microbatch_model.sql": microbatch_model_failing_incremental_partition_sql,
|
||||
}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# run all partitions from start - 2 expected rows in output, one failed
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run", "--event-time-start", "2020-01-01"])
|
||||
self.assert_row_count(project, "microbatch_model", 2)
|
||||
|
||||
|
||||
microbatch_model_first_partition_failing_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }}
|
||||
{% if '2020-01-01' in (model.config.__dbt_internal_microbatch_event_time_start | string) %}
|
||||
invalid_sql
|
||||
{% endif %}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
|
||||
class TestMicrobatchInitialPartitionFailure(BaseMicrobatchTest):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": input_model_sql,
|
||||
"microbatch_model.sql": microbatch_model_first_partition_failing_sql,
|
||||
}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# run all partitions from start - 2 expected rows in output, one failed
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run", "--event-time-start", "2020-01-01"])
|
||||
self.assert_row_count(project, "microbatch_model", 2)
|
||||
|
||||
|
||||
class TestMicrobatchCompiledRunPaths(BaseMicrobatchTest):
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_run_with_event_time(self, project):
|
||||
# run all partitions from start - 2 expected rows in output, one failed
|
||||
with patch_microbatch_end_time("2020-01-03 13:57:00"):
|
||||
run_dbt(["run", "--event-time-start", "2020-01-01"])
|
||||
|
||||
# Compiled paths - compiled model without filter only
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"compiled",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model.sql",
|
||||
)
|
||||
|
||||
# Compiled paths - batch compilations
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"compiled",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-01.sql",
|
||||
)
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"compiled",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-02.sql",
|
||||
)
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"compiled",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-03.sql",
|
||||
)
|
||||
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"run",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-01.sql",
|
||||
)
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"run",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-02.sql",
|
||||
)
|
||||
assert read_file(
|
||||
project.project_root,
|
||||
"target",
|
||||
"run",
|
||||
"test",
|
||||
"models",
|
||||
"microbatch_model",
|
||||
"microbatch_model_2020-01-03.sql",
|
||||
)
|
||||
185
tests/functional/microbatch/test_microbatch_config_validation.py
Normal file
185
tests/functional/microbatch/test_microbatch_config_validation.py
Normal file
@@ -0,0 +1,185 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from dbt.exceptions import ParsingError
|
||||
from dbt.tests.util import run_dbt
|
||||
|
||||
valid_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='day', event_time='event_time') }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
valid_microbatch_model_no_config_sql = """
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
valid_microbatch_model_config_yml = """
|
||||
models:
|
||||
- name: microbatch
|
||||
config:
|
||||
materialized: incremental
|
||||
incremental_strategy: microbatch
|
||||
batch_size: day
|
||||
event_time: event_time
|
||||
begin: 2020-01-01
|
||||
"""
|
||||
|
||||
invalid_microbatch_model_config_yml = """
|
||||
models:
|
||||
- name: microbatch
|
||||
config:
|
||||
materialized: incremental
|
||||
incremental_strategy: microbatch
|
||||
batch_size: day
|
||||
event_time: event_time
|
||||
begin: 2020-01-01 11 PM
|
||||
"""
|
||||
|
||||
missing_event_time_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='day') }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
invalid_event_time_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='day', event_time=2) }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
missing_begin_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='day', event_time='event_time') }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
invalid_begin_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='day', event_time='event_time', begin=2) }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
|
||||
missing_batch_size_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', event_time='event_time') }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
invalid_batch_size_microbatch_model_sql = """
|
||||
{{ config(materialized='incremental', incremental_strategy='microbatch', batch_size='invalid', event_time='event_time') }}
|
||||
select * from {{ ref('input_model') }}
|
||||
"""
|
||||
|
||||
invalid_event_time_input_model_sql = """
|
||||
{{ config(materialized='table', event_time=1) }}
|
||||
|
||||
select 1 as id, TIMESTAMP '2020-01-01 00:00:00-0' as event_time
|
||||
"""
|
||||
|
||||
valid_input_model_sql = """
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
select 1 as id, TIMESTAMP '2020-01-01 00:00:00-0' as event_time
|
||||
"""
|
||||
|
||||
|
||||
class BaseMicrobatchTestParseError:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_parsing_error_raised(self, project):
|
||||
with pytest.raises(ParsingError):
|
||||
run_dbt(["parse"])
|
||||
|
||||
|
||||
class BaseMicrobatchTestNoError:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {}
|
||||
|
||||
@mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
def test_parsing_error_not_raised(self, project):
|
||||
run_dbt(["parse"])
|
||||
|
||||
|
||||
class TestMissingEventTimeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": missing_event_time_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestInvalidEventTimeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": invalid_event_time_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestMissingBeginMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": missing_begin_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestInvaliBeginTypeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": invalid_begin_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestInvaliBegiFormatMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": valid_microbatch_model_no_config_sql,
|
||||
"microbatch.yml": invalid_microbatch_model_config_yml,
|
||||
}
|
||||
|
||||
|
||||
class TestMissingBatchSizeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": missing_batch_size_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestInvalidBatchSizeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": invalid_batch_size_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestInvalidInputEventTimeMicrobatch(BaseMicrobatchTestParseError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": invalid_event_time_input_model_sql,
|
||||
"microbatch.sql": valid_microbatch_model_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestValidBeginMicrobatch(BaseMicrobatchTestNoError):
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"input_model.sql": valid_input_model_sql,
|
||||
"microbatch.sql": valid_microbatch_model_no_config_sql,
|
||||
"schema.yml": valid_microbatch_model_config_yml,
|
||||
}
|
||||
@@ -132,6 +132,33 @@ class TestShowInlineFailDB(ShowBase):
|
||||
run_dbt(["show", "--inline", "slect asdlkjfsld;j"])
|
||||
|
||||
|
||||
class TestShowInlineDirect(ShowBase):
|
||||
|
||||
def test_inline_direct_pass(self, project):
|
||||
query = f"select * from {project.test_schema}.sample_seed"
|
||||
(_, log_output) = run_dbt_and_capture(["show", "--inline-direct", query])
|
||||
assert "Previewing inline node" in log_output
|
||||
assert "sample_num" in log_output
|
||||
assert "sample_bool" in log_output
|
||||
|
||||
# This is a bit of a hack. Unfortunately, the test teardown code
|
||||
# expects that dbt loaded an adapter with a macro context the last
|
||||
# time it was called. The '--inline-direct' parameter used on the
|
||||
# previous run explicitly disables macros. So now we call 'dbt seed',
|
||||
# which will load the adapter fully and satisfy the teardown code.
|
||||
run_dbt(["seed"])
|
||||
|
||||
|
||||
class TestShowInlineDirectFail(ShowBase):
|
||||
|
||||
def test_inline_fail_database_error(self, project):
|
||||
with pytest.raises(DbtRuntimeError, match="Database Error"):
|
||||
run_dbt(["show", "--inline-direct", "slect asdlkjfsld;j"])
|
||||
|
||||
# See prior test for explanation of why this is here
|
||||
run_dbt(["seed"])
|
||||
|
||||
|
||||
class TestShowEphemeral(ShowBase):
|
||||
def test_ephemeral_model(self, project):
|
||||
run_dbt(["build"])
|
||||
|
||||
82
tests/functional/snapshots/data/seed_cn.sql
Normal file
82
tests/functional/snapshots/data/seed_cn.sql
Normal file
@@ -0,0 +1,82 @@
|
||||
create table {database}.{schema}.seed (
|
||||
id INTEGER,
|
||||
first_name VARCHAR(50),
|
||||
last_name VARCHAR(50),
|
||||
email VARCHAR(50),
|
||||
gender VARCHAR(50),
|
||||
ip_address VARCHAR(20),
|
||||
updated_at TIMESTAMP WITHOUT TIME ZONE
|
||||
);
|
||||
|
||||
create table {database}.{schema}.snapshot_expected (
|
||||
id INTEGER,
|
||||
first_name VARCHAR(50),
|
||||
last_name VARCHAR(50),
|
||||
email VARCHAR(50),
|
||||
gender VARCHAR(50),
|
||||
ip_address VARCHAR(20),
|
||||
|
||||
-- snapshotting fields
|
||||
updated_at TIMESTAMP WITHOUT TIME ZONE,
|
||||
test_valid_from TIMESTAMP WITHOUT TIME ZONE,
|
||||
test_valid_to TIMESTAMP WITHOUT TIME ZONE,
|
||||
test_scd_id TEXT,
|
||||
test_updated_at TIMESTAMP WITHOUT TIME ZONE
|
||||
);
|
||||
|
||||
|
||||
-- seed inserts
|
||||
-- use the same email for two users to verify that duplicated check_cols values
|
||||
-- are handled appropriately
|
||||
insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values
|
||||
(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'),
|
||||
(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'),
|
||||
(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'),
|
||||
(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'),
|
||||
(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'),
|
||||
(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'),
|
||||
(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'),
|
||||
(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'),
|
||||
(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'),
|
||||
(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'),
|
||||
(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'),
|
||||
(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'),
|
||||
(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'),
|
||||
(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'),
|
||||
(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'),
|
||||
(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'),
|
||||
(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'),
|
||||
(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'),
|
||||
(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'),
|
||||
(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19');
|
||||
|
||||
|
||||
-- populate snapshot table
|
||||
insert into {database}.{schema}.snapshot_expected (
|
||||
id,
|
||||
first_name,
|
||||
last_name,
|
||||
email,
|
||||
gender,
|
||||
ip_address,
|
||||
updated_at,
|
||||
test_valid_from,
|
||||
test_valid_to,
|
||||
test_updated_at,
|
||||
test_scd_id
|
||||
)
|
||||
|
||||
select
|
||||
id,
|
||||
first_name,
|
||||
last_name,
|
||||
email,
|
||||
gender,
|
||||
ip_address,
|
||||
updated_at,
|
||||
-- fields added by snapshotting
|
||||
updated_at as test_valid_from,
|
||||
null::timestamp as test_valid_to,
|
||||
updated_at as test_updated_at,
|
||||
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
|
||||
from {database}.{schema}.seed;
|
||||
@@ -291,6 +291,19 @@ snapshots_pg__snapshot_sql = """
|
||||
{% endsnapshot %}
|
||||
"""
|
||||
|
||||
snapshots_pg__snapshot_yml = """
|
||||
version: 2
|
||||
snapshots:
|
||||
- name: snapshot_actual
|
||||
relation: "ref('seed')"
|
||||
config:
|
||||
unique_key: "id || '-' || first_name"
|
||||
strategy: timestamp
|
||||
updated_at: updated_at
|
||||
meta:
|
||||
owner: 'a_owner'
|
||||
"""
|
||||
|
||||
snapshots_pg__snapshot_no_target_schema_sql = """
|
||||
{% snapshot snapshot_actual %}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ from tests.functional.snapshots.fixtures import (
|
||||
seeds__seed_newcol_csv,
|
||||
snapshots_pg__snapshot_no_target_schema_sql,
|
||||
snapshots_pg__snapshot_sql,
|
||||
snapshots_pg__snapshot_yml,
|
||||
snapshots_pg_custom__snapshot_sql,
|
||||
snapshots_pg_custom_namespaced__snapshot_sql,
|
||||
)
|
||||
@@ -372,3 +373,24 @@ class TestBasicUpdatedAtCheckCols(UpdatedAtCheckCols):
|
||||
class TestRefUpdatedAtCheckCols(UpdatedAtCheckCols):
|
||||
def test_updated_at_ref(self, project):
|
||||
ref_setup(project, num_snapshot_models=2)
|
||||
|
||||
|
||||
class BasicYaml(Basic):
|
||||
@pytest.fixture(scope="class")
|
||||
def snapshots(self):
|
||||
"""Overrides the same function in Basic to use the YAML method of
|
||||
defining a snapshot."""
|
||||
return {"snapshot.yml": snapshots_pg__snapshot_yml}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
"""Overrides the same function in Basic to use a modified version of
|
||||
schema.yml without snapshot config."""
|
||||
return {
|
||||
"ref_snapshot.sql": models__ref_snapshot_sql,
|
||||
}
|
||||
|
||||
|
||||
class TestBasicSnapshotYaml(BasicYaml):
|
||||
def test_basic_snapshot_yaml(self, project):
|
||||
snapshot_setup(project, num_snapshot_models=1)
|
||||
|
||||
234
tests/functional/snapshots/test_snapshot_column_names.py
Normal file
234
tests/functional/snapshots/test_snapshot_column_names.py
Normal file
@@ -0,0 +1,234 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from dbt.tests.util import (
|
||||
check_relations_equal,
|
||||
get_manifest,
|
||||
run_dbt,
|
||||
run_dbt_and_capture,
|
||||
update_config_file,
|
||||
)
|
||||
|
||||
snapshot_actual_sql = """
|
||||
{% snapshot snapshot_actual %}
|
||||
|
||||
{{
|
||||
config(
|
||||
unique_key='id || ' ~ "'-'" ~ ' || first_name',
|
||||
)
|
||||
}}
|
||||
|
||||
select * from {{target.database}}.{{target.schema}}.seed
|
||||
|
||||
{% endsnapshot %}
|
||||
"""
|
||||
|
||||
snapshots_yml = """
|
||||
snapshots:
|
||||
- name: snapshot_actual
|
||||
config:
|
||||
strategy: timestamp
|
||||
updated_at: updated_at
|
||||
snapshot_meta_column_names:
|
||||
dbt_valid_to: test_valid_to
|
||||
dbt_valid_from: test_valid_from
|
||||
dbt_scd_id: test_scd_id
|
||||
dbt_updated_at: test_updated_at
|
||||
"""
|
||||
|
||||
snapshots_no_column_names_yml = """
|
||||
snapshots:
|
||||
- name: snapshot_actual
|
||||
config:
|
||||
strategy: timestamp
|
||||
updated_at: updated_at
|
||||
"""
|
||||
|
||||
ref_snapshot_sql = """
|
||||
select * from {{ ref('snapshot_actual') }}
|
||||
"""
|
||||
|
||||
|
||||
invalidate_sql = """
|
||||
-- update records 11 - 21. Change email and updated_at field
|
||||
update {schema}.seed set
|
||||
updated_at = updated_at + interval '1 hour',
|
||||
email = case when id = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end
|
||||
where id >= 10 and id <= 20;
|
||||
|
||||
|
||||
-- invalidate records 11 - 21
|
||||
update {schema}.snapshot_expected set
|
||||
test_valid_to = updated_at + interval '1 hour'
|
||||
where id >= 10 and id <= 20;
|
||||
|
||||
"""
|
||||
|
||||
update_sql = """
|
||||
-- insert v2 of the 11 - 21 records
|
||||
|
||||
insert into {database}.{schema}.snapshot_expected (
|
||||
id,
|
||||
first_name,
|
||||
last_name,
|
||||
email,
|
||||
gender,
|
||||
ip_address,
|
||||
updated_at,
|
||||
test_valid_from,
|
||||
test_valid_to,
|
||||
test_updated_at,
|
||||
test_scd_id
|
||||
)
|
||||
|
||||
select
|
||||
id,
|
||||
first_name,
|
||||
last_name,
|
||||
email,
|
||||
gender,
|
||||
ip_address,
|
||||
updated_at,
|
||||
-- fields added by snapshotting
|
||||
updated_at as test_valid_from,
|
||||
null::timestamp as test_valid_to,
|
||||
updated_at as test_updated_at,
|
||||
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
|
||||
from {database}.{schema}.seed
|
||||
where id >= 10 and id <= 20;
|
||||
"""
|
||||
|
||||
|
||||
class TestSnapshotColumnNames:
|
||||
@pytest.fixture(scope="class")
|
||||
def snapshots(self):
|
||||
return {"snapshot.sql": snapshot_actual_sql}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"snapshots.yml": snapshots_yml,
|
||||
"ref_snapshot.sql": ref_snapshot_sql,
|
||||
}
|
||||
|
||||
def test_snapshot_column_names(self, project):
|
||||
path = os.path.join(project.test_data_dir, "seed_cn.sql")
|
||||
project.run_sql_file(path)
|
||||
results = run_dbt(["snapshot"])
|
||||
assert len(results) == 1
|
||||
|
||||
project.run_sql(invalidate_sql)
|
||||
project.run_sql(update_sql)
|
||||
|
||||
results = run_dbt(["snapshot"])
|
||||
assert len(results) == 1
|
||||
|
||||
# run_dbt(["test"])
|
||||
check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"])
|
||||
|
||||
|
||||
class TestSnapshotColumnNamesFromDbtProject:
|
||||
@pytest.fixture(scope="class")
|
||||
def snapshots(self):
|
||||
return {"snapshot.sql": snapshot_actual_sql}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"snapshots.yml": snapshots_no_column_names_yml,
|
||||
"ref_snapshot.sql": ref_snapshot_sql,
|
||||
}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def project_config_update(self):
|
||||
return {
|
||||
"snapshots": {
|
||||
"test": {
|
||||
"+snapshot_meta_column_names": {
|
||||
"dbt_valid_to": "test_valid_to",
|
||||
"dbt_valid_from": "test_valid_from",
|
||||
"dbt_scd_id": "test_scd_id",
|
||||
"dbt_updated_at": "test_updated_at",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def test_snapshot_column_names_from_project(self, project):
|
||||
path = os.path.join(project.test_data_dir, "seed_cn.sql")
|
||||
project.run_sql_file(path)
|
||||
results = run_dbt(["snapshot"])
|
||||
assert len(results) == 1
|
||||
|
||||
project.run_sql(invalidate_sql)
|
||||
project.run_sql(update_sql)
|
||||
|
||||
results = run_dbt(["snapshot"])
|
||||
assert len(results) == 1
|
||||
|
||||
# run_dbt(["test"])
|
||||
check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"])
|
||||
|
||||
|
||||
class TestSnapshotInvalidColumnNames:
|
||||
@pytest.fixture(scope="class")
|
||||
def snapshots(self):
|
||||
return {"snapshot.sql": snapshot_actual_sql}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"snapshots.yml": snapshots_no_column_names_yml,
|
||||
"ref_snapshot.sql": ref_snapshot_sql,
|
||||
}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def project_config_update(self):
|
||||
return {
|
||||
"snapshots": {
|
||||
"test": {
|
||||
"+snapshot_meta_column_names": {
|
||||
"dbt_valid_to": "test_valid_to",
|
||||
"dbt_valid_from": "test_valid_from",
|
||||
"dbt_scd_id": "test_scd_id",
|
||||
"dbt_updated_at": "test_updated_at",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def test_snapshot_invalid_column_names(self, project):
|
||||
path = os.path.join(project.test_data_dir, "seed_cn.sql")
|
||||
project.run_sql_file(path)
|
||||
results = run_dbt(["snapshot"])
|
||||
assert len(results) == 1
|
||||
manifest = get_manifest(project.project_root)
|
||||
snapshot_node = manifest.nodes["snapshot.test.snapshot_actual"]
|
||||
snapshot_node.config.snapshot_meta_column_names == {
|
||||
"dbt_valid_to": "test_valid_to",
|
||||
"dbt_valid_from": "test_valid_from",
|
||||
"dbt_scd_id": "test_scd_id",
|
||||
"dbt_updated_at": "test_updated_at",
|
||||
}
|
||||
|
||||
project.run_sql(invalidate_sql)
|
||||
project.run_sql(update_sql)
|
||||
|
||||
# Change snapshot_meta_columns and look for an error
|
||||
different_columns = {
|
||||
"snapshots": {
|
||||
"test": {
|
||||
"+snapshot_meta_column_names": {
|
||||
"dbt_valid_to": "test_valid_to",
|
||||
"dbt_updated_at": "test_updated_at",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
update_config_file(different_columns, "dbt_project.yml")
|
||||
|
||||
results, log_output = run_dbt_and_capture(["snapshot"], expect_pass=False)
|
||||
assert len(results) == 1
|
||||
assert "Compilation Error in snapshot snapshot_actual" in log_output
|
||||
assert "Snapshot target is missing configured columns" in log_output
|
||||
@@ -27,6 +27,14 @@ select *
|
||||
from {{ source('seed_sources', 'raw_source') }}
|
||||
"""
|
||||
|
||||
model_no_ephemeral_ref_sql = """
|
||||
select *
|
||||
from {{ ref('model_input') }}
|
||||
union all
|
||||
select *
|
||||
from {{ source('seed_sources', 'raw_source') }}
|
||||
"""
|
||||
|
||||
|
||||
schema_sources_yml = """
|
||||
sources:
|
||||
@@ -36,6 +44,29 @@ sources:
|
||||
- name: raw_source
|
||||
"""
|
||||
|
||||
unit_tests_yml = """
|
||||
unit_tests:
|
||||
- name: test_my_model
|
||||
model: model_no_ephemeral_ref
|
||||
given:
|
||||
- input: ref('model_input')
|
||||
format: csv
|
||||
rows: |
|
||||
id
|
||||
1
|
||||
- input: source('seed_sources', 'raw_source')
|
||||
format: csv
|
||||
rows: |
|
||||
id
|
||||
2
|
||||
expect:
|
||||
format: csv
|
||||
rows: |
|
||||
id
|
||||
1
|
||||
2
|
||||
"""
|
||||
|
||||
|
||||
class TestEmptyFlag:
|
||||
@pytest.fixture(scope="class")
|
||||
@@ -50,7 +81,9 @@ class TestEmptyFlag:
|
||||
"model_input.sql": model_input_sql,
|
||||
"ephemeral_model_input.sql": ephemeral_model_input_sql,
|
||||
"model.sql": model_sql,
|
||||
"model_no_ephemeral_ref.sql": model_no_ephemeral_ref_sql,
|
||||
"sources.yml": schema_sources_yml,
|
||||
"unit_tests.yml": unit_tests_yml,
|
||||
}
|
||||
|
||||
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
|
||||
|
||||
@@ -54,9 +54,16 @@ models:
|
||||
- name: metricflow_time_spine
|
||||
time_spine:
|
||||
standard_granularity_column: date_day
|
||||
custom_granularities:
|
||||
- name: retail_month
|
||||
- name: martian_year
|
||||
column_name: martian__year_xyz
|
||||
columns:
|
||||
- name: date_day
|
||||
granularity: day
|
||||
- name: retail_month
|
||||
- name: martian__year_xyz
|
||||
|
||||
"""
|
||||
|
||||
missing_time_spine_yml = """
|
||||
@@ -76,7 +83,7 @@ models:
|
||||
- name: ts_second
|
||||
"""
|
||||
|
||||
time_spine_missing_column_yml = """
|
||||
time_spine_missing_standard_column_yml = """
|
||||
models:
|
||||
- name: metricflow_time_spine_second
|
||||
time_spine:
|
||||
@@ -84,3 +91,15 @@ models:
|
||||
columns:
|
||||
- name: date_day
|
||||
"""
|
||||
|
||||
time_spine_missing_custom_column_yml = """
|
||||
models:
|
||||
- name: metricflow_time_spine_second
|
||||
time_spine:
|
||||
standard_granularity_column: date_day
|
||||
custom_granularities:
|
||||
- name: retail_month
|
||||
columns:
|
||||
- name: date_day
|
||||
granularity: day
|
||||
"""
|
||||
|
||||
@@ -13,8 +13,9 @@ from tests.functional.time_spines.fixtures import (
|
||||
metricflow_time_spine_sql,
|
||||
models_people_sql,
|
||||
semantic_model_people_yml,
|
||||
time_spine_missing_column_yml,
|
||||
time_spine_missing_custom_column_yml,
|
||||
time_spine_missing_granularity_yml,
|
||||
time_spine_missing_standard_column_yml,
|
||||
valid_time_spines_yml,
|
||||
)
|
||||
|
||||
@@ -65,7 +66,18 @@ class TestValidTimeSpines:
|
||||
model.time_spine.standard_granularity_column
|
||||
== model_names_to_col_names[model.name]
|
||||
)
|
||||
assert len(model.columns) == 1
|
||||
if model.name == day_model_name:
|
||||
assert len(model.time_spine.custom_granularities) == 2
|
||||
assert {
|
||||
custom_granularity.name
|
||||
for custom_granularity in model.time_spine.custom_granularities
|
||||
} == {"retail_month", "martian_year"}
|
||||
for custom_granularity in model.time_spine.custom_granularities:
|
||||
if custom_granularity.name == "martian_year":
|
||||
assert custom_granularity.column_name == "martian__year_xyz"
|
||||
else:
|
||||
assert len(model.time_spine.custom_granularities) == 0
|
||||
assert len(model.columns) > 0
|
||||
assert (
|
||||
list(model.columns.values())[0].granularity
|
||||
== model_names_to_granularities[model.name]
|
||||
@@ -152,8 +164,8 @@ class TestMissingTimeSpine:
|
||||
)
|
||||
|
||||
|
||||
class TestTimeSpineColumnMissing:
|
||||
"""Tests that YAML with time spine column not in model errors."""
|
||||
class TestTimeSpineStandardColumnMissing:
|
||||
"""Tests that YAML with time spine standard granularity column not in model errors."""
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
@@ -162,7 +174,7 @@ class TestTimeSpineColumnMissing:
|
||||
"people.sql": models_people_sql,
|
||||
"metricflow_time_spine.sql": metricflow_time_spine_sql,
|
||||
"metricflow_time_spine_second.sql": metricflow_time_spine_second_sql,
|
||||
"time_spines.yml": time_spine_missing_column_yml,
|
||||
"time_spines.yml": time_spine_missing_standard_column_yml,
|
||||
}
|
||||
|
||||
def test_time_spines(self, project):
|
||||
@@ -175,6 +187,29 @@ class TestTimeSpineColumnMissing:
|
||||
)
|
||||
|
||||
|
||||
class TestTimeSpineCustomColumnMissing:
|
||||
"""Tests that YAML with time spine custom granularity column not in model errors."""
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"semantic_model_people.yml": semantic_model_people_yml,
|
||||
"people.sql": models_people_sql,
|
||||
"metricflow_time_spine.sql": metricflow_time_spine_sql,
|
||||
"metricflow_time_spine_second.sql": metricflow_time_spine_second_sql,
|
||||
"time_spines.yml": time_spine_missing_custom_column_yml,
|
||||
}
|
||||
|
||||
def test_time_spines(self, project):
|
||||
runner = dbtRunner()
|
||||
result = runner.invoke(["parse"])
|
||||
assert isinstance(result.exception, ParsingError)
|
||||
assert (
|
||||
"Time spine custom granularity columns do not exist in the model."
|
||||
in result.exception.msg
|
||||
)
|
||||
|
||||
|
||||
class TestTimeSpineGranularityMissing:
|
||||
"""Tests that YAML with time spine column without granularity errors."""
|
||||
|
||||
|
||||
@@ -50,6 +50,18 @@ END a_is_null
|
||||
FROM {{ ref('my_model_a') }}
|
||||
"""
|
||||
|
||||
test_my_model_a_yml = """
|
||||
models:
|
||||
- name: my_model_a
|
||||
columns:
|
||||
- name: a
|
||||
tests:
|
||||
- not_null
|
||||
- name: id
|
||||
tests:
|
||||
- not_null
|
||||
"""
|
||||
|
||||
test_my_model_yml = """
|
||||
unit_tests:
|
||||
- name: test_my_model
|
||||
@@ -292,6 +304,28 @@ where event_time > (select max(event_time) from {{ this }})
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
my_incremental_model_with_alias_sql = """
|
||||
{{
|
||||
config(
|
||||
materialized='incremental',
|
||||
alias='alias_name'
|
||||
)
|
||||
}}
|
||||
|
||||
select * from {{ ref('events') }}
|
||||
{% if is_incremental() %}
|
||||
where event_time > (select max(event_time) from {{ this }})
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
my_incremental_model_versioned_yml = """
|
||||
models:
|
||||
- name: my_incremental_model
|
||||
latest_version: 1
|
||||
versions:
|
||||
- v: 1
|
||||
"""
|
||||
|
||||
test_my_model_incremental_yml_basic = """
|
||||
unit_tests:
|
||||
- name: incremental_false
|
||||
|
||||
@@ -8,6 +8,8 @@ from fixtures import ( # noqa: F401
|
||||
external_package,
|
||||
external_package__accounts_seed_csv,
|
||||
my_incremental_model_sql,
|
||||
my_incremental_model_versioned_yml,
|
||||
my_incremental_model_with_alias_sql,
|
||||
my_model_a_sql,
|
||||
my_model_b_sql,
|
||||
my_model_sql,
|
||||
@@ -76,11 +78,19 @@ class TestUnitTests:
|
||||
)
|
||||
assert len(results) == 1
|
||||
|
||||
# Exclude unit tests with environment variable
|
||||
# Exclude unit tests with environment variable for build command
|
||||
os.environ["DBT_EXCLUDE_RESOURCE_TYPES"] = "unit_test"
|
||||
results = run_dbt(["build", "--select", "my_model"], expect_pass=True)
|
||||
assert len(results) == 1
|
||||
|
||||
# Exclude unit tests with environment variable for test command
|
||||
results = run_dbt(["test", "--select", "my_model"], expect_pass=True)
|
||||
assert len(results) == 0
|
||||
|
||||
# Exclude unit tests with environment variable for list command
|
||||
results = run_dbt(["list", "--select", "my_model"], expect_pass=True)
|
||||
assert len(results) == 1
|
||||
|
||||
del os.environ["DBT_EXCLUDE_RESOURCE_TYPES"]
|
||||
|
||||
# Test select by test name
|
||||
@@ -263,6 +273,42 @@ unit_tests:
|
||||
"""
|
||||
|
||||
|
||||
class TestUnitTestIncrementalModelWithAlias:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"my_incremental_model.sql": my_incremental_model_with_alias_sql,
|
||||
"events.sql": event_sql,
|
||||
"schema.yml": test_my_model_incremental_yml_basic,
|
||||
}
|
||||
|
||||
def test_basic(self, project):
|
||||
results = run_dbt(["run"])
|
||||
assert len(results) == 2
|
||||
|
||||
# Select by model name
|
||||
results = run_dbt(["test", "--select", "my_incremental_model"], expect_pass=True)
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
class TestUnitTestIncrementalModelWithVersion:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"my_incremental_model.sql": my_incremental_model_sql,
|
||||
"events.sql": event_sql,
|
||||
"schema.yml": my_incremental_model_versioned_yml + test_my_model_incremental_yml_basic,
|
||||
}
|
||||
|
||||
def test_basic(self, project):
|
||||
results = run_dbt(["run"])
|
||||
assert len(results) == 2
|
||||
|
||||
# Select by model name
|
||||
results = run_dbt(["test", "--select", "my_incremental_model"], expect_pass=True)
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
class TestUnitTestExplicitSeed:
|
||||
@pytest.fixture(scope="class")
|
||||
def seeds(self):
|
||||
|
||||
91
tests/functional/unit_testing/test_ut_resource_types.py
Normal file
91
tests/functional/unit_testing/test_ut_resource_types.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import pytest
|
||||
from fixtures import ( # noqa: F401
|
||||
my_model_a_sql,
|
||||
my_model_b_sql,
|
||||
my_model_sql,
|
||||
test_my_model_a_yml,
|
||||
test_my_model_pass_yml,
|
||||
)
|
||||
|
||||
from dbt.tests.util import run_dbt
|
||||
|
||||
EXPECTED_MODELS = [
|
||||
"test.my_model",
|
||||
"test.my_model_a",
|
||||
"test.my_model_b",
|
||||
]
|
||||
|
||||
EXPECTED_DATA_TESTS = [
|
||||
"test.not_null_my_model_a_a",
|
||||
"test.not_null_my_model_a_id",
|
||||
]
|
||||
|
||||
EXPECTED_UNIT_TESTS = [
|
||||
"unit_test:test.test_my_model",
|
||||
]
|
||||
|
||||
|
||||
class TestUnitTestResourceTypes:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"my_model.sql": my_model_sql,
|
||||
"my_model_a.sql": my_model_a_sql,
|
||||
"my_model_b.sql": my_model_b_sql,
|
||||
"test_my_model.yml": test_my_model_pass_yml,
|
||||
"test_my_model_a.yml": test_my_model_a_yml,
|
||||
}
|
||||
|
||||
def test_unit_test_list(self, project):
|
||||
results = run_dbt(["run"])
|
||||
|
||||
# unit tests
|
||||
results = run_dbt(["list", "--resource-type", "unit_test"])
|
||||
assert sorted(results) == EXPECTED_UNIT_TESTS
|
||||
|
||||
results = run_dbt(["list", "--exclude-resource-types", "model", "test"])
|
||||
assert sorted(results) == EXPECTED_UNIT_TESTS
|
||||
|
||||
results = run_dbt(["test", "--resource-type", "unit_test"])
|
||||
assert len(results) == len(EXPECTED_UNIT_TESTS)
|
||||
|
||||
results = run_dbt(["test", "--exclude-resource-types", "model", "test"])
|
||||
assert len(results) == len(EXPECTED_UNIT_TESTS)
|
||||
|
||||
# data tests
|
||||
results = run_dbt(["list", "--resource-type", "test"])
|
||||
assert sorted(results) == EXPECTED_DATA_TESTS
|
||||
|
||||
results = run_dbt(["list", "--exclude-resource-types", "unit_test", "model"])
|
||||
assert sorted(results) == EXPECTED_DATA_TESTS
|
||||
|
||||
results = run_dbt(["test", "--resource-type", "test"])
|
||||
assert len(results) == len(EXPECTED_DATA_TESTS)
|
||||
|
||||
results = run_dbt(["test", "--exclude-resource-types", "unit_test", "model"])
|
||||
assert len(results) == len(EXPECTED_DATA_TESTS)
|
||||
|
||||
results = run_dbt(["build", "--resource-type", "test"])
|
||||
assert len(results) == len(EXPECTED_DATA_TESTS)
|
||||
|
||||
results = run_dbt(["build", "--exclude-resource-types", "unit_test", "model"])
|
||||
assert len(results) == len(EXPECTED_DATA_TESTS)
|
||||
|
||||
# models
|
||||
results = run_dbt(["list", "--resource-type", "model"])
|
||||
assert sorted(results) == EXPECTED_MODELS
|
||||
|
||||
results = run_dbt(["list", "--exclude-resource-type", "unit_test", "test"])
|
||||
assert sorted(results) == EXPECTED_MODELS
|
||||
|
||||
results = run_dbt(["test", "--resource-type", "model"])
|
||||
assert len(results) == 0
|
||||
|
||||
results = run_dbt(["test", "--exclude-resource-types", "unit_test", "test"])
|
||||
assert len(results) == 0
|
||||
|
||||
results = run_dbt(["build", "--resource-type", "model"])
|
||||
assert len(results) == len(EXPECTED_MODELS)
|
||||
|
||||
results = run_dbt(["build", "--exclude-resource-type", "unit_test", "test"])
|
||||
assert len(results) == len(EXPECTED_MODELS)
|
||||
@@ -1,11 +1,8 @@
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from dbt_common.events.base_types import BaseEvent, EventMsg
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@contextmanager
|
||||
@@ -20,16 +17,3 @@ def up_one(return_path: Optional[Path] = None):
|
||||
|
||||
def is_aware(dt: datetime) -> bool:
|
||||
return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EventCatcher:
|
||||
event_to_catch: BaseEvent
|
||||
caught_events: List[EventMsg] = field(default_factory=list)
|
||||
|
||||
def catch(self, event: EventMsg):
|
||||
if event.info.name == self.event_to_catch.__name__:
|
||||
self.caught_events.append(event)
|
||||
|
||||
def flush(self) -> None:
|
||||
self.caught_events = []
|
||||
|
||||
@@ -31,7 +31,7 @@ class TestProjectMethods:
|
||||
def test_all_source_paths(self, project: Project):
|
||||
assert (
|
||||
project.all_source_paths.sort()
|
||||
== ["models", "seeds", "snapshots", "analyses", "macros"].sort()
|
||||
== ["models", "seeds", "snapshots", "analyses", "macros", "tests"].sort()
|
||||
)
|
||||
|
||||
def test_generic_test_paths(self, project: Project):
|
||||
@@ -99,7 +99,8 @@ class TestProjectInitialization(BaseConfigTest):
|
||||
self.assertEqual(project.test_paths, ["tests"])
|
||||
self.assertEqual(project.analysis_paths, ["analyses"])
|
||||
self.assertEqual(
|
||||
set(project.docs_paths), set(["models", "seeds", "snapshots", "analyses", "macros"])
|
||||
set(project.docs_paths),
|
||||
{"models", "seeds", "snapshots", "analyses", "macros", "tests"},
|
||||
)
|
||||
self.assertEqual(project.asset_paths, [])
|
||||
self.assertEqual(project.target_path, "target")
|
||||
@@ -128,7 +129,7 @@ class TestProjectInitialization(BaseConfigTest):
|
||||
)
|
||||
self.assertEqual(
|
||||
set(project.docs_paths),
|
||||
set(["other-models", "seeds", "snapshots", "analyses", "macros"]),
|
||||
{"other-models", "seeds", "snapshots", "analyses", "macros", "tests"},
|
||||
)
|
||||
|
||||
def test_all_overrides(self):
|
||||
|
||||
@@ -129,7 +129,7 @@ class TestRuntimeConfigFiles(BaseConfigTest):
|
||||
self.assertEqual(config.test_paths, ["tests"])
|
||||
self.assertEqual(config.analysis_paths, ["analyses"])
|
||||
self.assertEqual(
|
||||
set(config.docs_paths), set(["models", "seeds", "snapshots", "analyses", "macros"])
|
||||
set(config.docs_paths), {"models", "seeds", "snapshots", "analyses", "macros", "tests"}
|
||||
)
|
||||
self.assertEqual(config.asset_paths, [])
|
||||
self.assertEqual(config.target_path, "target")
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from dbt.adapters.base import BaseRelation
|
||||
from dbt.artifacts.resources import Quoting
|
||||
from dbt.artifacts.resources import NodeConfig, Quoting
|
||||
from dbt.artifacts.resources.types import BatchSize
|
||||
from dbt.context.providers import (
|
||||
BaseResolver,
|
||||
EventTimeFilter,
|
||||
RuntimeRefResolver,
|
||||
RuntimeSourceResolver,
|
||||
)
|
||||
@@ -34,6 +38,49 @@ class TestBaseResolver:
|
||||
|
||||
assert resolver.resolve_limit == expected_resolve_limit
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dbt_experimental_microbatch,materialized,incremental_strategy,expect_filter",
|
||||
[
|
||||
(True, "incremental", "microbatch", True),
|
||||
(False, "incremental", "microbatch", False),
|
||||
(True, "table", "microbatch", False),
|
||||
(True, "incremental", "merge", False),
|
||||
],
|
||||
)
|
||||
def test_resolve_event_time_filter(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
resolver: ResolverSubclass,
|
||||
dbt_experimental_microbatch: bool,
|
||||
materialized: str,
|
||||
incremental_strategy: str,
|
||||
expect_filter: bool,
|
||||
) -> None:
|
||||
if dbt_experimental_microbatch:
|
||||
mocker.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"})
|
||||
|
||||
# Target mocking
|
||||
target = mock.Mock()
|
||||
target.config = mock.MagicMock(NodeConfig)
|
||||
target.config.event_time = "created_at"
|
||||
|
||||
# Resolver mocking
|
||||
resolver.config.args.EVENT_TIME_END = None
|
||||
resolver.config.args.EVENT_TIME_START = None
|
||||
resolver.model.config = mock.MagicMock(NodeConfig)
|
||||
resolver.model.config.materialized = materialized
|
||||
resolver.model.config.incremental_strategy = incremental_strategy
|
||||
resolver.model.config.batch_size = BatchSize.day
|
||||
resolver.model.config.lookback = 0
|
||||
|
||||
# Try to get an EventTimeFilter
|
||||
event_time_filter = resolver.resolve_event_time_filter(target=target)
|
||||
|
||||
if expect_filter:
|
||||
assert isinstance(event_time_filter, EventTimeFilter)
|
||||
else:
|
||||
assert event_time_filter is None
|
||||
|
||||
|
||||
class TestRuntimeRefResolver:
|
||||
@pytest.fixture
|
||||
|
||||
@@ -183,6 +183,7 @@ def basic_compiled_dict():
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"docs": {"show": True},
|
||||
"access": "protected",
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"columns": {},
|
||||
|
||||
@@ -100,6 +100,7 @@ def populated_node_config_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"access": "protected",
|
||||
"lookback": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -187,6 +188,7 @@ def base_parsed_model_dict():
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"access": "protected",
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
@@ -297,6 +299,7 @@ def complex_parsed_model_dict():
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"access": "protected",
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
@@ -520,6 +523,7 @@ def basic_parsed_seed_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"columns": {},
|
||||
@@ -611,6 +615,7 @@ def complex_parsed_seed_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"columns": {
|
||||
@@ -818,6 +823,7 @@ def base_parsed_hook_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
@@ -899,6 +905,7 @@ def complex_parsed_hook_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
@@ -1242,6 +1249,7 @@ def basic_timestamp_snapshot_config_dict():
|
||||
"quoting": {},
|
||||
"tags": [],
|
||||
"unique_key": "id",
|
||||
"snapshot_meta_column_names": {},
|
||||
"strategy": "timestamp",
|
||||
"updated_at": "last_update",
|
||||
"target_database": "some_snapshot_db",
|
||||
@@ -1253,6 +1261,7 @@ def basic_timestamp_snapshot_config_dict():
|
||||
"packages": [],
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"lookback": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -1277,6 +1286,7 @@ def complex_timestamp_snapshot_config_dict():
|
||||
"post-hook": [{"sql": 'insert into blah(a, b) select "1", 1', "transaction": True}],
|
||||
"pre-hook": [],
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {},
|
||||
"tags": [],
|
||||
"target_database": "some_snapshot_db",
|
||||
"target_schema": "some_snapshot_schema",
|
||||
@@ -1291,6 +1301,7 @@ def complex_timestamp_snapshot_config_dict():
|
||||
"packages": [],
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"lookback": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -1344,6 +1355,7 @@ def basic_check_snapshot_config_dict():
|
||||
"post-hook": [],
|
||||
"pre-hook": [],
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {},
|
||||
"tags": [],
|
||||
"target_database": "some_snapshot_db",
|
||||
"target_schema": "some_snapshot_schema",
|
||||
@@ -1357,6 +1369,7 @@ def basic_check_snapshot_config_dict():
|
||||
"packages": [],
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"lookback": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -1381,6 +1394,7 @@ def complex_set_snapshot_config_dict():
|
||||
"post-hook": [{"sql": 'insert into blah(a, b) select "1", 1', "transaction": True}],
|
||||
"pre-hook": [],
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {},
|
||||
"tags": [],
|
||||
"target_database": "some_snapshot_db",
|
||||
"target_schema": "some_snapshot_schema",
|
||||
@@ -1395,6 +1409,7 @@ def complex_set_snapshot_config_dict():
|
||||
"packages": [],
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"lookback": 0,
|
||||
}
|
||||
|
||||
|
||||
@@ -1506,6 +1521,7 @@ def basic_timestamp_snapshot_dict():
|
||||
"post-hook": [],
|
||||
"pre-hook": [],
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {},
|
||||
"tags": [],
|
||||
"target_database": "some_snapshot_db",
|
||||
"target_schema": "some_snapshot_schema",
|
||||
@@ -1519,6 +1535,7 @@ def basic_timestamp_snapshot_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
@@ -1608,6 +1625,7 @@ def basic_check_snapshot_dict():
|
||||
"post-hook": [],
|
||||
"pre-hook": [],
|
||||
"quoting": {},
|
||||
"snapshot_meta_column_names": {},
|
||||
"tags": [],
|
||||
"target_database": "some_snapshot_db",
|
||||
"target_schema": "some_snapshot_schema",
|
||||
@@ -1621,6 +1639,7 @@ def basic_check_snapshot_dict():
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
"packages": [],
|
||||
"lookback": 0,
|
||||
},
|
||||
"docs": {"show": True},
|
||||
"contract": {"enforced": False, "alias_types": True},
|
||||
|
||||
@@ -23,7 +23,7 @@ class TestSetupEventLogger:
|
||||
|
||||
setup_event_logger(get_flags())
|
||||
assert len(manager.loggers) == 0
|
||||
assert len(manager.callbacks) == 0
|
||||
assert len(manager.callbacks) == 1 # snowplow tracker for behavior flags
|
||||
|
||||
def test_specify_max_bytes(
|
||||
self,
|
||||
|
||||
@@ -15,7 +15,7 @@ from dbt.artifacts.resources import (
|
||||
)
|
||||
from dbt.artifacts.resources.v1.semantic_model import NodeRelation
|
||||
from dbt.contracts.graph.model_config import TestConfig
|
||||
from dbt.contracts.graph.nodes import ColumnInfo, ModelNode, SemanticModel
|
||||
from dbt.contracts.graph.nodes import ColumnInfo, ModelNode, ParsedNode, SemanticModel
|
||||
from dbt.node_types import NodeType
|
||||
from dbt_common.contracts.constraints import (
|
||||
ColumnLevelConstraint,
|
||||
@@ -391,3 +391,35 @@ def test_disabled_unique_combo_multiple():
|
||||
|
||||
def assertSameContents(list1, list2):
|
||||
assert sorted(list1) == sorted(list2)
|
||||
|
||||
|
||||
class TestParsedNode:
|
||||
@pytest.fixture(scope="class")
|
||||
def parsed_node(self) -> ParsedNode:
|
||||
return ParsedNode(
|
||||
resource_type=NodeType.Model,
|
||||
unique_id="model.test_package.test_name",
|
||||
name="test_name",
|
||||
package_name="test_package",
|
||||
schema="test_schema",
|
||||
alias="test_alias",
|
||||
fqn=["models", "test_name"],
|
||||
original_file_path="test_original_file_path",
|
||||
checksum=FileHash.from_contents("checksum"),
|
||||
path="test_path.sql",
|
||||
database=None,
|
||||
)
|
||||
|
||||
def test_get_target_write_path(self, parsed_node):
|
||||
write_path = parsed_node.get_target_write_path("target_path", "subdirectory")
|
||||
assert (
|
||||
write_path
|
||||
== "target_path/subdirectory/test_package/test_original_file_path/test_path.sql"
|
||||
)
|
||||
|
||||
def test_get_target_write_path_split(self, parsed_node):
|
||||
write_path = parsed_node.get_target_write_path("target_path", "subdirectory", "split")
|
||||
assert (
|
||||
write_path
|
||||
== "target_path/subdirectory/test_package/test_original_file_path/test_path/test_path_split.sql"
|
||||
)
|
||||
|
||||
466
tests/unit/materializations/incremental/test_microbatch.py
Normal file
466
tests/unit/materializations/incremental/test_microbatch.py
Normal file
@@ -0,0 +1,466 @@
|
||||
from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
import pytz
|
||||
from freezegun import freeze_time
|
||||
|
||||
from dbt.artifacts.resources import NodeConfig
|
||||
from dbt.artifacts.resources.types import BatchSize
|
||||
from dbt.materializations.incremental.microbatch import MicrobatchBuilder
|
||||
|
||||
MODEL_CONFIG_BEGIN = datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC)
|
||||
|
||||
|
||||
class TestMicrobatchBuilder:
|
||||
@pytest.fixture(scope="class")
|
||||
def microbatch_model(self):
|
||||
model = mock.Mock()
|
||||
model.config = mock.MagicMock(NodeConfig)
|
||||
model.config.materialized = "incremental"
|
||||
model.config.incremental_strategy = "microbatch"
|
||||
model.config.begin = MODEL_CONFIG_BEGIN
|
||||
|
||||
return model
|
||||
|
||||
@freeze_time("2024-09-05 08:56:00")
|
||||
@pytest.mark.parametrize(
|
||||
"is_incremental,event_time_end,expected_end_time",
|
||||
[
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_build_end_time(
|
||||
self, microbatch_model, is_incremental, event_time_end, expected_end_time
|
||||
):
|
||||
microbatch_builder = MicrobatchBuilder(
|
||||
model=microbatch_model,
|
||||
is_incremental=is_incremental,
|
||||
event_time_start=None,
|
||||
event_time_end=event_time_end,
|
||||
)
|
||||
|
||||
assert microbatch_builder.build_end_time() == expected_end_time
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"is_incremental,event_time_start,checkpoint,batch_size,lookback,expected_start_time",
|
||||
[
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
0,
|
||||
# is_incremental: False => model.config.begin
|
||||
MODEL_CONFIG_BEGIN,
|
||||
),
|
||||
# BatchSize.year
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
0,
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
# Offset not applied when event_time_start provided
|
||||
1,
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
0,
|
||||
# is_incremental=False + no start_time -> model.config.begin
|
||||
MODEL_CONFIG_BEGIN,
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
0,
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
1,
|
||||
datetime(2023, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
# BatchSize.month
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
0,
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
# Offset not applied when event_time_start provided
|
||||
1,
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
0,
|
||||
# is_incremental=False + no start_time -> model.config.begin
|
||||
MODEL_CONFIG_BEGIN,
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
0,
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
1,
|
||||
datetime(2024, 8, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
# BatchSize.day
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
0,
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
# Offset not applied when event_time_start provided
|
||||
1,
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
0,
|
||||
# is_incremental=False + no start_time -> model.config.begin
|
||||
MODEL_CONFIG_BEGIN,
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
0,
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
1,
|
||||
datetime(2024, 9, 4, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
# BatchSize.hour
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
0,
|
||||
datetime(2024, 9, 5, 8, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
# Offset not applied when event_time_start provided
|
||||
1,
|
||||
datetime(2024, 9, 5, 8, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
False,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
0,
|
||||
# is_incremental=False + no start_time -> model.config.begin
|
||||
MODEL_CONFIG_BEGIN,
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
0,
|
||||
datetime(2024, 9, 5, 8, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
True,
|
||||
None,
|
||||
datetime(2024, 9, 5, 8, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
1,
|
||||
datetime(2024, 9, 5, 7, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_build_start_time(
|
||||
self,
|
||||
microbatch_model,
|
||||
is_incremental,
|
||||
event_time_start,
|
||||
checkpoint,
|
||||
batch_size,
|
||||
lookback,
|
||||
expected_start_time,
|
||||
):
|
||||
microbatch_model.config.batch_size = batch_size
|
||||
microbatch_model.config.lookback = lookback
|
||||
microbatch_builder = MicrobatchBuilder(
|
||||
model=microbatch_model,
|
||||
is_incremental=is_incremental,
|
||||
event_time_start=event_time_start,
|
||||
event_time_end=None,
|
||||
)
|
||||
|
||||
assert microbatch_builder.build_start_time(checkpoint) == expected_start_time
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,batch_size,expected_batches",
|
||||
[
|
||||
# BatchSize.year
|
||||
(
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2026, 1, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.year,
|
||||
[
|
||||
(
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2025, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2025, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2026, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2026, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2026, 1, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
),
|
||||
# BatchSize.month
|
||||
(
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 11, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.month,
|
||||
[
|
||||
(
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 11, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 11, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 11, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
),
|
||||
# BatchSize.day
|
||||
(
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.day,
|
||||
[
|
||||
(
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 6, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 6, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 7, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 7, 0, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 7, 3, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
),
|
||||
# BatchSize.hour
|
||||
(
|
||||
datetime(2024, 9, 5, 1, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 3, 56, 0, 0, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
[
|
||||
(
|
||||
datetime(2024, 9, 5, 1, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 2, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 2, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 3, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 0, 0, 0, pytz.UTC),
|
||||
datetime(2024, 9, 5, 3, 56, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_build_batches(self, microbatch_model, start, end, batch_size, expected_batches):
|
||||
microbatch_model.config.batch_size = batch_size
|
||||
microbatch_builder = MicrobatchBuilder(
|
||||
model=microbatch_model, is_incremental=True, event_time_start=None, event_time_end=None
|
||||
)
|
||||
|
||||
actual_batches = microbatch_builder.build_batches(start, end)
|
||||
assert len(actual_batches) == len(expected_batches)
|
||||
assert actual_batches == expected_batches
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"timestamp,batch_size,offset,expected_timestamp",
|
||||
[
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.year,
|
||||
1,
|
||||
datetime(2025, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.year,
|
||||
-1,
|
||||
datetime(2023, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.month,
|
||||
1,
|
||||
datetime(2024, 10, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.month,
|
||||
-1,
|
||||
datetime(2024, 8, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.day,
|
||||
1,
|
||||
datetime(2024, 9, 6, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.day,
|
||||
-1,
|
||||
datetime(2024, 9, 4, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
1,
|
||||
datetime(2024, 9, 5, 4, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
-1,
|
||||
datetime(2024, 9, 5, 2, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_offset_timestamp(self, timestamp, batch_size, offset, expected_timestamp):
|
||||
assert (
|
||||
MicrobatchBuilder.offset_timestamp(timestamp, batch_size, offset) == expected_timestamp
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"timestamp,batch_size,expected_timestamp",
|
||||
[
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.year,
|
||||
datetime(2024, 1, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.month,
|
||||
datetime(2024, 9, 1, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.day,
|
||||
datetime(2024, 9, 5, 0, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
(
|
||||
datetime(2024, 9, 5, 3, 56, 1, 1, pytz.UTC),
|
||||
BatchSize.hour,
|
||||
datetime(2024, 9, 5, 3, 0, 0, 0, pytz.UTC),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_truncate_timestamp(self, timestamp, batch_size, expected_timestamp):
|
||||
assert MicrobatchBuilder.truncate_timestamp(timestamp, batch_size) == expected_timestamp
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"batch_size,batch_start,expected_formatted_batch_start",
|
||||
[
|
||||
(None, None, None),
|
||||
(BatchSize.year, datetime(2020, 1, 1, 1), "2020-01-01"),
|
||||
(BatchSize.month, datetime(2020, 1, 1, 1), "2020-01-01"),
|
||||
(BatchSize.day, datetime(2020, 1, 1, 1), "2020-01-01"),
|
||||
(BatchSize.hour, datetime(2020, 1, 1, 1), "2020-01-01 01:00:00"),
|
||||
],
|
||||
)
|
||||
def test_format_batch_start(self, batch_size, batch_start, expected_formatted_batch_start):
|
||||
assert (
|
||||
MicrobatchBuilder.format_batch_start(batch_start, batch_size)
|
||||
== expected_formatted_batch_start
|
||||
)
|
||||
66
tests/unit/test_behavior_flags.py
Normal file
66
tests/unit/test_behavior_flags.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pytest
|
||||
|
||||
from dbt.tracking import (
|
||||
disable_tracking,
|
||||
initialize_from_flags,
|
||||
track_behavior_change_warn,
|
||||
)
|
||||
from dbt_common.behavior_flags import Behavior
|
||||
from dbt_common.events.event_manager_client import (
|
||||
add_callback_to_manager,
|
||||
cleanup_event_logger,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def snowplow_tracker(mocker):
|
||||
# initialize `active_user` without writing the cookie to disk
|
||||
initialize_from_flags(True, "")
|
||||
mocker.patch("dbt.tracking.User.set_cookie").return_value = {"id": 42}
|
||||
|
||||
# add the relevant callback to the event manager
|
||||
add_callback_to_manager(track_behavior_change_warn)
|
||||
|
||||
# don't make a call, catch the request
|
||||
# to avoid confusion, this is snowplow_tracker's track, not our wrapper that's also named track
|
||||
snowplow_tracker = mocker.patch("dbt.tracking.tracker.track")
|
||||
|
||||
yield snowplow_tracker
|
||||
|
||||
# teardown
|
||||
cleanup_event_logger()
|
||||
disable_tracking()
|
||||
|
||||
|
||||
def test_false_evaluation_triggers_snowplow_tracking(snowplow_tracker):
|
||||
behavior = Behavior(
|
||||
[{"name": "my_flag", "default": False, "description": "This is a false flag."}], {}
|
||||
)
|
||||
if behavior.my_flag:
|
||||
# trigger a False evaluation
|
||||
assert False, "This flag should evaluate to false and skip this line"
|
||||
assert snowplow_tracker.called
|
||||
|
||||
|
||||
def test_true_evaluation_does_not_trigger_snowplow_tracking(snowplow_tracker):
|
||||
behavior = Behavior(
|
||||
[{"name": "my_flag", "default": True, "description": "This is a true flag."}], {}
|
||||
)
|
||||
if behavior.my_flag:
|
||||
pass
|
||||
else:
|
||||
# trigger a True evaluation
|
||||
assert False, "This flag should evaluate to false and skip this line"
|
||||
assert not snowplow_tracker.called
|
||||
|
||||
|
||||
def test_false_evaluation_does_not_trigger_snowplow_tracking_when_disabled(snowplow_tracker):
|
||||
disable_tracking()
|
||||
|
||||
behavior = Behavior(
|
||||
[{"name": "my_flag", "default": False, "description": "This is a false flag."}], {}
|
||||
)
|
||||
if behavior.my_flag:
|
||||
# trigger a False evaluation
|
||||
assert False, "This flag should evaluate to false and skip this line"
|
||||
assert not snowplow_tracker.called
|
||||
Reference in New Issue
Block a user