mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-18 20:21:27 +00:00
Compare commits
45 Commits
fix_spaces
...
callum_tes
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec1c9f0362 | ||
|
|
caeecf4a67 | ||
|
|
fcea5969ae | ||
|
|
672a7d7fc8 | ||
|
|
a033aa0180 | ||
|
|
38991cd3a9 | ||
|
|
4f966b0d54 | ||
|
|
d3eaa37212 | ||
|
|
ce1759f793 | ||
|
|
4b3e797530 | ||
|
|
3e9ed1ff9b | ||
|
|
9b0dce32b8 | ||
|
|
ce8e886f38 | ||
|
|
12d02dc9d1 | ||
|
|
72f59da9df | ||
|
|
0dd99eac83 | ||
|
|
f0564f9d32 | ||
|
|
278e4c7673 | ||
|
|
e3ec07d035 | ||
|
|
c7aa2ed7ef | ||
|
|
e49e259950 | ||
|
|
140597276b | ||
|
|
6712a5841a | ||
|
|
f4356d8dd2 | ||
|
|
7c715c5625 | ||
|
|
5b9a24fd23 | ||
|
|
6378c13e7a | ||
|
|
2db94c5788 | ||
|
|
f25c8f39fc | ||
|
|
3b8b191623 | ||
|
|
246fd66e8e | ||
|
|
817d39ac14 | ||
|
|
85e27ac747 | ||
|
|
b5ca2e4c5f | ||
|
|
e69b465c41 | ||
|
|
6937b321d6 | ||
|
|
a6fc443abc | ||
|
|
340cae3b43 | ||
|
|
91c5e2cc86 | ||
|
|
bafae0326b | ||
|
|
7e1b788bd8 | ||
|
|
1bd2fe09a1 | ||
|
|
5b0197635d | ||
|
|
c1ad7b0f0e | ||
|
|
2da925aa25 |
6
.changes/unreleased/Features-20230118-134804.yaml
Normal file
6
.changes/unreleased/Features-20230118-134804.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: Adding the entity node
|
||||
time: 2023-01-18T13:48:04.487817-06:00
|
||||
custom:
|
||||
Author: callum-mcdata
|
||||
Issue: "6627"
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,6 +9,7 @@ __pycache__/
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env*/
|
||||
.mf_dbt_venv
|
||||
dbt_env/
|
||||
build/
|
||||
!core/dbt/docs/build
|
||||
|
||||
@@ -22,6 +22,18 @@ from dbt.task.build import BuildTask
|
||||
from dbt.task.generate import GenerateTask
|
||||
from dbt.task.init import InitTask
|
||||
|
||||
import importlib
|
||||
|
||||
metricflow_module = importlib.util.find_spec("metricflow")
|
||||
if metricflow_module is not None:
|
||||
from metricflow.cli.main import (
|
||||
list_metrics,
|
||||
list_dimensions,
|
||||
get_dimension_values,
|
||||
query,
|
||||
validate_configs
|
||||
)
|
||||
|
||||
|
||||
class dbtUsageException(Exception):
|
||||
pass
|
||||
@@ -160,6 +172,14 @@ def clean(ctx, **kwargs):
|
||||
return results, success
|
||||
|
||||
|
||||
# mf
|
||||
@cli.group()
|
||||
@click.pass_context
|
||||
def mf(ctx, **kwargs):
|
||||
"""Used to house the metricflow metrics"""
|
||||
pass
|
||||
|
||||
|
||||
# dbt docs
|
||||
@cli.group()
|
||||
@click.pass_context
|
||||
@@ -572,6 +592,12 @@ def freshness(ctx, **kwargs):
|
||||
snapshot_freshness = copy(cli.commands["source"].commands["freshness"]) # type: ignore
|
||||
snapshot_freshness.hidden = True
|
||||
cli.commands["source"].add_command(snapshot_freshness, "snapshot-freshness") # type: ignore
|
||||
if metricflow_module is not None:
|
||||
cli.add_command(list_metrics, "list-metrics")
|
||||
cli.add_command(list_dimensions, "list-dimensions")
|
||||
cli.add_command(get_dimension_values, "get-dimension-values")
|
||||
cli.add_command(query, "query")
|
||||
cli.add_command(validate_configs, "validate-configs")
|
||||
|
||||
|
||||
# dbt test
|
||||
@@ -612,6 +638,32 @@ def test(ctx, **kwargs):
|
||||
return results, success
|
||||
|
||||
|
||||
# dbt validate
|
||||
# @cli.command("validate")
|
||||
# @click.pass_context
|
||||
# @p.args
|
||||
# @p.profile
|
||||
# @p.profiles_dir
|
||||
# @p.project_dir
|
||||
# @p.target
|
||||
# @p.vars
|
||||
# @requires.preflight
|
||||
# @requires.profile
|
||||
# @requires.project
|
||||
# @requires.runtime_config
|
||||
# @requires.manifest
|
||||
# def validate(ctx, **kwargs):
|
||||
# """Validates the semantic layer"""
|
||||
# task = ValidateTask(
|
||||
# ctx.obj["flags"],
|
||||
# ctx.obj["runtime_config"],
|
||||
# ctx.obj["manifest"],
|
||||
# )
|
||||
# results = task.run()
|
||||
# success = task.interpret_results(results)
|
||||
# return results, success
|
||||
|
||||
|
||||
# Support running as a module
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
|
||||
@@ -37,7 +37,7 @@ cache_selected_only = click.option(
|
||||
compile_docs = click.option(
|
||||
"--compile/--no-compile",
|
||||
envvar=None,
|
||||
help="Wether or not to run 'dbt compile' as part of docs generation",
|
||||
help="Whether or not to run 'dbt compile' as part of docs generation",
|
||||
default=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ def print_compile_stats(stats):
|
||||
NodeType.Source: "source",
|
||||
NodeType.Exposure: "exposure",
|
||||
NodeType.Metric: "metric",
|
||||
NodeType.Entity: "entity",
|
||||
NodeType.Group: "group",
|
||||
}
|
||||
|
||||
@@ -86,6 +87,8 @@ def _generate_stats(manifest: Manifest):
|
||||
stats[exposure.resource_type] += 1
|
||||
for metric in manifest.metrics.values():
|
||||
stats[metric.resource_type] += 1
|
||||
for entity in manifest.entities.values():
|
||||
stats[entity.resource_type] += 1
|
||||
for macro in manifest.macros.values():
|
||||
stats[macro.resource_type] += 1
|
||||
for group in manifest.groups.values():
|
||||
@@ -402,6 +405,8 @@ class Compiler:
|
||||
linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
|
||||
elif dependency in manifest.metrics:
|
||||
linker.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
|
||||
elif dependency in manifest.entities:
|
||||
linker.dependency(node.unique_id, (manifest.entities[dependency].unique_id))
|
||||
else:
|
||||
raise GraphDependencyNotFoundError(node, dependency)
|
||||
|
||||
@@ -414,6 +419,8 @@ class Compiler:
|
||||
self.link_node(linker, exposure, manifest)
|
||||
for metric in manifest.metrics.values():
|
||||
self.link_node(linker, metric, manifest)
|
||||
for entity in manifest.entities.values():
|
||||
self.link_node(linker, entity, manifest)
|
||||
|
||||
cycle = linker.find_cycles()
|
||||
|
||||
|
||||
@@ -394,6 +394,7 @@ class PartialProject(RenderComponents):
|
||||
sources: Dict[str, Any]
|
||||
tests: Dict[str, Any]
|
||||
metrics: Dict[str, Any]
|
||||
entities: Dict[str, Any]
|
||||
exposures: Dict[str, Any]
|
||||
vars_value: VarProvider
|
||||
|
||||
@@ -404,6 +405,7 @@ class PartialProject(RenderComponents):
|
||||
sources = cfg.sources
|
||||
tests = cfg.tests
|
||||
metrics = cfg.metrics
|
||||
entities = cfg.entities
|
||||
exposures = cfg.exposures
|
||||
if cfg.vars is None:
|
||||
vars_dict: Dict[str, Any] = {}
|
||||
@@ -459,6 +461,7 @@ class PartialProject(RenderComponents):
|
||||
sources=sources,
|
||||
tests=tests,
|
||||
metrics=metrics,
|
||||
entities=entities,
|
||||
exposures=exposures,
|
||||
vars=vars_value,
|
||||
config_version=cfg.config_version,
|
||||
@@ -563,6 +566,7 @@ class Project:
|
||||
sources: Dict[str, Any]
|
||||
tests: Dict[str, Any]
|
||||
metrics: Dict[str, Any]
|
||||
entities: Dict[str, Any]
|
||||
exposures: Dict[str, Any]
|
||||
vars: VarProvider
|
||||
dbt_version: List[VersionSpecifier]
|
||||
@@ -637,6 +641,7 @@ class Project:
|
||||
"sources": self.sources,
|
||||
"tests": self.tests,
|
||||
"metrics": self.metrics,
|
||||
"entities": self.entities,
|
||||
"exposures": self.exposures,
|
||||
"vars": self.vars.to_dict(),
|
||||
"require-dbt-version": [v.to_version_string() for v in self.dbt_version],
|
||||
|
||||
@@ -165,6 +165,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
sources=project.sources,
|
||||
tests=project.tests,
|
||||
metrics=project.metrics,
|
||||
entities=project.entities,
|
||||
exposures=project.exposures,
|
||||
vars=project.vars,
|
||||
config_version=project.config_version,
|
||||
@@ -314,6 +315,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
||||
"sources": self._get_config_paths(self.sources),
|
||||
"tests": self._get_config_paths(self.tests),
|
||||
"metrics": self._get_config_paths(self.metrics),
|
||||
"entities": self._get_config_paths(self.entities),
|
||||
"exposures": self._get_config_paths(self.exposures),
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ import datetime
|
||||
import re
|
||||
import itertools
|
||||
|
||||
import importlib
|
||||
metricflow_module = importlib.util.find_spec("metricflow")
|
||||
|
||||
# See the `contexts` module README for more information on how contexts work
|
||||
|
||||
|
||||
@@ -51,6 +54,11 @@ def get_re_module_context() -> Dict[str, Any]:
|
||||
|
||||
return {name: getattr(re, name) for name in context_exports}
|
||||
|
||||
if metricflow_module is not None:
|
||||
def get_metricflow_module_context() -> Dict[str, Any]:
|
||||
from metricflow.api.metricflow_client import MetricFlowClient
|
||||
context_exports = ["explain"]
|
||||
return {name: getattr(MetricFlowClient, name) for name in context_exports}
|
||||
|
||||
def get_itertools_module_context() -> Dict[str, Any]:
|
||||
# Excluded dropwhile, filterfalse, takewhile and groupby;
|
||||
|
||||
@@ -45,6 +45,8 @@ class UnrenderedConfig(ConfigSource):
|
||||
model_configs = unrendered.get("tests")
|
||||
elif resource_type == NodeType.Metric:
|
||||
model_configs = unrendered.get("metrics")
|
||||
elif resource_type == NodeType.Entity:
|
||||
model_configs = unrendered.get("entities")
|
||||
elif resource_type == NodeType.Exposure:
|
||||
model_configs = unrendered.get("exposures")
|
||||
else:
|
||||
@@ -70,6 +72,8 @@ class RenderedConfig(ConfigSource):
|
||||
model_configs = self.project.tests
|
||||
elif resource_type == NodeType.Metric:
|
||||
model_configs = self.project.metrics
|
||||
elif resource_type == NodeType.Entity:
|
||||
model_configs = self.project.entities
|
||||
elif resource_type == NodeType.Exposure:
|
||||
model_configs = self.project.exposures
|
||||
else:
|
||||
|
||||
@@ -33,12 +33,14 @@ from dbt.contracts.graph.nodes import (
|
||||
Macro,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
SeedNode,
|
||||
SourceDefinition,
|
||||
Resource,
|
||||
ManifestNode,
|
||||
)
|
||||
from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
|
||||
from dbt.contracts.graph.metrics import MetricReference
|
||||
from dbt.contracts.graph.entities import EntityReference, ResolvedEntityReference
|
||||
from dbt.events.functions import get_metadata_vars
|
||||
from dbt.exceptions import (
|
||||
CompilationError,
|
||||
@@ -54,6 +56,7 @@ from dbt.exceptions import (
|
||||
MacroDispatchArgError,
|
||||
MacrosSourcesUnWriteableError,
|
||||
MetricArgsError,
|
||||
EntityArgsError,
|
||||
MissingConfigError,
|
||||
OperationsCannotRefEphemeralNodesError,
|
||||
PackageNotInDepsError,
|
||||
@@ -206,7 +209,7 @@ class BaseResolver(metaclass=abc.ABCMeta):
|
||||
return self.db_wrapper.Relation
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
|
||||
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference, EntityReference]:
|
||||
pass
|
||||
|
||||
|
||||
@@ -305,6 +308,41 @@ class BaseMetricResolver(BaseResolver):
|
||||
return self.resolve(name, package)
|
||||
|
||||
|
||||
class BaseEntityResolver(BaseResolver):
|
||||
def resolve(self, name: str, package: Optional[str] = None) -> EntityReference:
|
||||
...
|
||||
|
||||
def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
|
||||
if package is None:
|
||||
return [name]
|
||||
else:
|
||||
return [package, name]
|
||||
|
||||
def validate_args(self, name: str, package: Optional[str]):
|
||||
if not isinstance(name, str):
|
||||
raise CompilationError(
|
||||
f"The name argument to entity() must be a string, got {type(name)}"
|
||||
)
|
||||
|
||||
if package is not None and not isinstance(package, str):
|
||||
raise CompilationError(
|
||||
f"The package argument to entity() must be a string or None, got {type(package)}"
|
||||
)
|
||||
|
||||
def __call__(self, *args: str) -> EntityReference:
|
||||
name: str
|
||||
package: Optional[str] = None
|
||||
|
||||
if len(args) == 1:
|
||||
name = args[0]
|
||||
elif len(args) == 2:
|
||||
package, name = args
|
||||
else:
|
||||
raise EntityArgsError(node=self.model, args=args)
|
||||
self.validate_args(name, package)
|
||||
return self.resolve(name, package)
|
||||
|
||||
|
||||
class Config(Protocol):
|
||||
def __init__(self, model, context_config: Optional[ContextConfig]):
|
||||
...
|
||||
@@ -546,23 +584,38 @@ class ParseMetricResolver(BaseMetricResolver):
|
||||
|
||||
|
||||
class RuntimeMetricResolver(BaseMetricResolver):
|
||||
def resolve(self, target_name: str, target_package: Optional[str] = None) -> MetricReference:
|
||||
target_metric = self.manifest.resolve_metric(
|
||||
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
|
||||
self.model.metrics.append(self._repack_args(name, package))
|
||||
|
||||
return MetricReference(name, package)
|
||||
|
||||
|
||||
# metric` implementations
|
||||
class ParseEntityResolver(BaseEntityResolver):
|
||||
def resolve(self, name: str, package: Optional[str] = None) -> EntityReference:
|
||||
self.model.entities.append(self._repack_args(name, package))
|
||||
|
||||
return EntityReference(name, package)
|
||||
|
||||
|
||||
class RuntimeEntityResolver(BaseEntityResolver):
|
||||
def resolve(self, target_name: str, target_package: Optional[str] = None) -> EntityReference:
|
||||
target_entity = self.manifest.resolve_entity(
|
||||
target_name,
|
||||
target_package,
|
||||
self.current_project,
|
||||
self.model.package_name,
|
||||
)
|
||||
|
||||
if target_metric is None or isinstance(target_metric, Disabled):
|
||||
if target_entity is None or isinstance(target_entity, Disabled):
|
||||
raise TargetNotFoundError(
|
||||
node=self.model,
|
||||
target_name=target_name,
|
||||
target_kind="metric",
|
||||
target_kind="entity",
|
||||
target_package=target_package,
|
||||
)
|
||||
|
||||
return ResolvedMetricReference(target_metric, self.manifest, self.Relation)
|
||||
return ResolvedEntityReference(target_entity, self.manifest, self.Relation)
|
||||
|
||||
|
||||
# `var` implementations.
|
||||
@@ -623,6 +676,7 @@ class Provider(Protocol):
|
||||
ref: Type[BaseRefResolver]
|
||||
source: Type[BaseSourceResolver]
|
||||
metric: Type[BaseMetricResolver]
|
||||
entity: Type[BaseEntityResolver]
|
||||
|
||||
|
||||
class ParseProvider(Provider):
|
||||
@@ -633,6 +687,7 @@ class ParseProvider(Provider):
|
||||
ref = ParseRefResolver
|
||||
source = ParseSourceResolver
|
||||
metric = ParseMetricResolver
|
||||
entity = ParseEntityResolver
|
||||
|
||||
|
||||
class GenerateNameProvider(Provider):
|
||||
@@ -643,6 +698,7 @@ class GenerateNameProvider(Provider):
|
||||
ref = ParseRefResolver
|
||||
source = ParseSourceResolver
|
||||
metric = ParseMetricResolver
|
||||
entity = ParseEntityResolver
|
||||
|
||||
|
||||
class RuntimeProvider(Provider):
|
||||
@@ -653,6 +709,7 @@ class RuntimeProvider(Provider):
|
||||
ref = RuntimeRefResolver
|
||||
source = RuntimeSourceResolver
|
||||
metric = RuntimeMetricResolver
|
||||
entity = RuntimeEntityResolver
|
||||
|
||||
|
||||
class OperationProvider(RuntimeProvider):
|
||||
@@ -847,6 +904,10 @@ class ProviderContext(ManifestContext):
|
||||
def metric(self) -> Callable:
|
||||
return self.provider.metric(self.db_wrapper, self.model, self.config, self.manifest)
|
||||
|
||||
@contextproperty
|
||||
def entity(self) -> Callable:
|
||||
return self.provider.entity(self.db_wrapper, self.model, self.config, self.manifest)
|
||||
|
||||
@contextproperty("config")
|
||||
def ctx_config(self) -> Config:
|
||||
"""The `config` variable exists to handle end-user configuration for
|
||||
@@ -1431,6 +1492,14 @@ class ExposureMetricResolver(BaseResolver):
|
||||
return ""
|
||||
|
||||
|
||||
class ExposureEntityResolver(BaseResolver):
|
||||
def __call__(self, *args) -> str:
|
||||
if len(args) not in (1, 2):
|
||||
raise EntityArgsError(node=self.model, args=args)
|
||||
self.model.entities.append(list(args))
|
||||
return ""
|
||||
|
||||
|
||||
def generate_parse_exposure(
|
||||
exposure: Exposure,
|
||||
config: RuntimeConfig,
|
||||
@@ -1457,6 +1526,12 @@ def generate_parse_exposure(
|
||||
project,
|
||||
manifest,
|
||||
),
|
||||
"entity": ExposureEntityResolver(
|
||||
None,
|
||||
exposure,
|
||||
project,
|
||||
manifest,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -1501,6 +1576,57 @@ def generate_parse_metrics(
|
||||
project,
|
||||
manifest,
|
||||
),
|
||||
"entity": ParseEntityResolver(
|
||||
None,
|
||||
metric,
|
||||
project,
|
||||
manifest,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class EntityRefResolver(BaseResolver):
|
||||
def __call__(self, *args) -> str:
|
||||
package = None
|
||||
if len(args) == 1:
|
||||
name = args[0]
|
||||
elif len(args) == 2:
|
||||
package, name = args
|
||||
else:
|
||||
raise RefArgsError(node=self.model, args=args)
|
||||
self.validate_args(name, package)
|
||||
self.model.refs.append(list(args))
|
||||
return ""
|
||||
|
||||
def validate_args(self, name, package):
|
||||
if not isinstance(name, str):
|
||||
raise ParsingError(
|
||||
f"In the entity associated with {self.model.original_file_path} "
|
||||
"the name argument to ref() must be a string"
|
||||
)
|
||||
|
||||
|
||||
def generate_parse_entities(
|
||||
entity: Entity,
|
||||
config: RuntimeConfig,
|
||||
manifest: Manifest,
|
||||
package_name: str,
|
||||
) -> Dict[str, Any]:
|
||||
project = config.load_dependencies()[package_name]
|
||||
return {
|
||||
"ref": EntityRefResolver(
|
||||
None,
|
||||
entity,
|
||||
project,
|
||||
manifest,
|
||||
),
|
||||
# An entity cannot reference another entity so we comment out this section
|
||||
# "entity": ParseEntityResolver(
|
||||
# None,
|
||||
# entity,
|
||||
# project,
|
||||
# manifest,
|
||||
# ),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -227,6 +227,7 @@ class SchemaSourceFile(BaseSourceFile):
|
||||
sources: List[str] = field(default_factory=list)
|
||||
exposures: List[str] = field(default_factory=list)
|
||||
metrics: List[str] = field(default_factory=list)
|
||||
entities: List[str] = field(default_factory=list)
|
||||
groups: List[str] = field(default_factory=list)
|
||||
# node patches contain models, seeds, snapshots, analyses
|
||||
ndp: List[str] = field(default_factory=list)
|
||||
|
||||
91
core/dbt/contracts/graph/dimensions.py
Normal file
91
core/dbt/contracts/graph/dimensions.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
from dbt.contracts.util import Mergeable
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, Dict, Any, List
|
||||
from dbt.semantic.time import TimeGranularity
|
||||
from dbt.semantic.references import DimensionReference, TimeDimensionReference
|
||||
|
||||
|
||||
class DimensionType(StrEnum):
|
||||
CATEGORICAL = "categorical"
|
||||
TIME = "time"
|
||||
|
||||
def is_time_type(self) -> bool:
|
||||
"""Checks if this type of dimension is a time type"""
|
||||
return self in [DimensionType.TIME]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DimensionValidityParams(dbtClassMixin, Mergeable):
|
||||
"""Parameters identifying a given dimension as an identifier for validity state
|
||||
This construct is used for supporting SCD Type II tables, such as might be
|
||||
created via dbt's snapshot feature, or generated via periodic loads from external
|
||||
dimension data sources. In either of those cases, there is typically a time dimension
|
||||
associated with the SCD data source that indicates the start and end times of a
|
||||
validity window, where the dimension value is valid for any time within that range.
|
||||
"""
|
||||
|
||||
is_start: bool = False
|
||||
is_end: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class DimensionTypeParameters(dbtClassMixin, Mergeable):
|
||||
"""This class contains the type parameters required for the semantic layer.
|
||||
The first iteration of this is specifically focused on time.
|
||||
|
||||
Additionally we use the final two properties (start/end) for supporting SCD
|
||||
Type II tables, such as might be created via dbt's snapshot feature, or generated
|
||||
via periodic loads from external dimension data sources. In either of those cases,
|
||||
there is typically a time dimension associated with the SCD data source that
|
||||
indicates the start and end times of a validity window, where the dimension
|
||||
value is valid for any time within that range.
|
||||
|
||||
TODO: Can we abstract from params and have these be first class??"""
|
||||
|
||||
time_granularity: TimeGranularity
|
||||
is_primary: bool = False
|
||||
validity_params: Optional[DimensionValidityParams] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Dimension(dbtClassMixin, Mergeable):
|
||||
"""Each instance of this class represents a dimension in the associated entity."""
|
||||
|
||||
name: str
|
||||
type: DimensionType
|
||||
type_params: Optional[DimensionTypeParameters] = None
|
||||
expr: Optional[str] = None
|
||||
is_partition: bool = False
|
||||
description: str = ""
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_primary_time(self) -> bool: # noqa: D
|
||||
if self.type == DimensionType.TIME and self.type_params is not None:
|
||||
return self.type_params.is_primary
|
||||
return False
|
||||
|
||||
@property
|
||||
def reference(self) -> DimensionReference: # noqa: D
|
||||
return DimensionReference(name=self.name)
|
||||
|
||||
@property
|
||||
def time_dimension_reference(self) -> TimeDimensionReference: # noqa: D
|
||||
assert (
|
||||
self.type == DimensionType.TIME
|
||||
), f"Got type as {self.type} instead of {DimensionType.TIME}"
|
||||
return TimeDimensionReference(name=self.name)
|
||||
|
||||
@property
|
||||
def validity_params(self) -> Optional[DimensionValidityParams]:
|
||||
"""Returns the DimensionValidityParams property, if it exists.
|
||||
This is to avoid repeatedly checking that type params is not None before doing anything with ValidityParams
|
||||
"""
|
||||
if self.type_params:
|
||||
return self.type_params.validity_params
|
||||
|
||||
return None
|
||||
72
core/dbt/contracts/graph/entities.py
Normal file
72
core/dbt/contracts/graph/entities.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from dbt.contracts.util import Mergeable
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class EntityReference(object):
|
||||
def __init__(self, entity_name, package_name=None):
|
||||
self.entity_name = entity_name
|
||||
self.package_name = package_name
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.entity_name}"
|
||||
|
||||
|
||||
class ResolvedEntityReference(EntityReference):
|
||||
"""
|
||||
Simple proxy over an Entity which delegates property
|
||||
lookups to the underlying node. Also adds helper functions
|
||||
for working with metrics (ie. __str__ and templating functions)
|
||||
"""
|
||||
|
||||
def __init__(self, node, manifest, Relation):
|
||||
super().__init__(node.name, node.package_name)
|
||||
self.node = node
|
||||
self.manifest = manifest
|
||||
self.Relation = Relation
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.node, key)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.node.name}"
|
||||
|
||||
|
||||
class EntityMutabilityType(StrEnum):
|
||||
"""How data at the physical layer is expected to behave"""
|
||||
|
||||
UNKNOWN = "UNKNOWN"
|
||||
IMMUTABLE = "IMMUTABLE" # never changes
|
||||
APPEND_ONLY = "APPEND_ONLY" # appends along an orderable column
|
||||
DS_APPEND_ONLY = "DS_APPEND_ONLY" # appends along daily column
|
||||
FULL_MUTATION = "FULL_MUTATION" # no guarantees, everything may change
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityMutabilityTypeParams(dbtClassMixin, Mergeable):
|
||||
"""Type params add additional context to mutability"""
|
||||
|
||||
min: Optional[str] = None
|
||||
max: Optional[str] = None
|
||||
update_cron: Optional[str] = None
|
||||
along: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityMutability(dbtClassMixin):
|
||||
"""Describes the mutability properties of a data source"""
|
||||
|
||||
type: EntityMutabilityType
|
||||
type_params: Optional[EntityMutabilityTypeParams] = None
|
||||
|
||||
|
||||
class EntityOrigin(StrEnum):
|
||||
"""Describes how data sources were created
|
||||
Impacts determination of validity and duration of storage
|
||||
"""
|
||||
|
||||
SOURCE = "source" # "input" data sources
|
||||
DERIVED = (
|
||||
"derived" # generated by the semantic layer originating (perhaps indirectly) from sources
|
||||
)
|
||||
74
core/dbt/contracts/graph/identifiers.py
Normal file
74
core/dbt/contracts/graph/identifiers.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from __future__ import annotations
|
||||
from dbt.contracts.util import (
|
||||
Mergeable,
|
||||
)
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Dict, Any
|
||||
from dbt.semantic.references import CompositeSubIdentifierReference, IdentifierReference
|
||||
|
||||
|
||||
class IdentifierType(StrEnum):
|
||||
"""Defines uniqueness and the extent to which an identifier represents the common entity for a data source"""
|
||||
|
||||
FOREIGN = "foreign"
|
||||
NATURAL = "natural"
|
||||
PRIMARY = "primary"
|
||||
UNIQUE = "unique"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompositeSubIdentifier(dbtClassMixin):
|
||||
"""CompositeSubIdentifiers either describe or reference the identifiers that comprise a composite identifier"""
|
||||
|
||||
name: Optional[str] = None
|
||||
expr: Optional[str] = None
|
||||
ref: Optional[str] = None
|
||||
|
||||
@property
|
||||
def reference(self) -> CompositeSubIdentifierReference: # noqa: D
|
||||
assert (
|
||||
self.name
|
||||
), f"The element name should have been set during model transformation. Got {self}"
|
||||
return CompositeSubIdentifierReference(name=self.name)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Identifier(dbtClassMixin, Mergeable):
|
||||
"""Describes a identifier"""
|
||||
|
||||
name: str
|
||||
type: IdentifierType
|
||||
description: str = ""
|
||||
role: Optional[str] = None
|
||||
entity: Optional[str] = None
|
||||
identifiers: List[CompositeSubIdentifier] = field(default_factory=list)
|
||||
expr: Optional[str] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Moved validation down to entity level. No more default_entity_value
|
||||
|
||||
@property
|
||||
def is_primary_time(self) -> bool: # noqa: D
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_composite(self) -> bool: # noqa: D
|
||||
return self.identifiers is not None and len(self.identifiers) > 0
|
||||
|
||||
@property
|
||||
def reference(self) -> IdentifierReference: # noqa: D
|
||||
return IdentifierReference(name=self.name)
|
||||
|
||||
@property
|
||||
def is_linkable_identifier_type(self) -> bool:
|
||||
"""Indicates whether or not this identifier can be used as a linkable identifier type for joins
|
||||
That is, can you use the identifier as a linkable element in multi-hop dundered syntax. For example,
|
||||
the country dimension in the listings data source can be linked via listing__country, because listing
|
||||
is the primary key.
|
||||
At the moment, you may only request things accessible via primary, unique, or natural keys, with natural
|
||||
keys reserved for SCD Type II style data sources.
|
||||
"""
|
||||
return self.type in (IdentifierType.PRIMARY, IdentifierType.UNIQUE, IdentifierType.NATURAL)
|
||||
@@ -29,6 +29,7 @@ from dbt.contracts.graph.nodes import (
|
||||
GenericTestNode,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
Group,
|
||||
UnpatchedSourceDefinition,
|
||||
ManifestNode,
|
||||
@@ -36,6 +37,7 @@ from dbt.contracts.graph.nodes import (
|
||||
ResultNode,
|
||||
BaseNode,
|
||||
)
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.contracts.graph.unparsed import SourcePatch
|
||||
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
|
||||
from dbt.contracts.util import BaseArtifactMetadata, SourceKey, ArtifactMixin, schema_version
|
||||
@@ -213,6 +215,39 @@ class MetricLookup(dbtClassMixin):
|
||||
return manifest.metrics[unique_id]
|
||||
|
||||
|
||||
class EntityLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: "Manifest"):
|
||||
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
|
||||
self.populate(manifest)
|
||||
|
||||
def get_unique_id(self, search_name, package: Optional[PackageName]):
|
||||
return find_unique_id_for_package(self.storage, search_name, package)
|
||||
|
||||
def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
|
||||
unique_id = self.get_unique_id(search_name, package)
|
||||
if unique_id is not None:
|
||||
return self.perform_lookup(unique_id, manifest)
|
||||
return None
|
||||
|
||||
def add_entity(self, entity: Entity):
|
||||
if entity.search_name not in self.storage:
|
||||
self.storage[entity.search_name] = {}
|
||||
|
||||
self.storage[entity.search_name][entity.package_name] = entity.unique_id
|
||||
|
||||
def populate(self, manifest):
|
||||
for entity in manifest.entities.values():
|
||||
if hasattr(entity, "name"):
|
||||
self.add_entity(entity)
|
||||
|
||||
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> Entity:
|
||||
if unique_id not in manifest.entities:
|
||||
raise dbt.exceptions.DbtInternalError(
|
||||
f"Entity {unique_id} found in cache but not found in manifest"
|
||||
)
|
||||
return manifest.entities[unique_id]
|
||||
|
||||
|
||||
# This handles both models/seeds/snapshots and sources/metrics/exposures
|
||||
class DisabledLookup(dbtClassMixin):
|
||||
def __init__(self, manifest: "Manifest"):
|
||||
@@ -457,6 +492,9 @@ class Disabled(Generic[D]):
|
||||
MaybeMetricNode = Optional[Union[Metric, Disabled[Metric]]]
|
||||
|
||||
|
||||
MaybeEntityNode = Optional[Union[Entity, Disabled[Entity]]]
|
||||
|
||||
|
||||
MaybeDocumentation = Optional[Documentation]
|
||||
|
||||
|
||||
@@ -600,6 +638,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
docs: MutableMapping[str, Documentation] = field(default_factory=dict)
|
||||
exposures: MutableMapping[str, Exposure] = field(default_factory=dict)
|
||||
metrics: MutableMapping[str, Metric] = field(default_factory=dict)
|
||||
entities: MutableMapping[str, Entity] = field(default_factory=dict)
|
||||
groups: MutableMapping[str, Group] = field(default_factory=dict)
|
||||
selectors: MutableMapping[str, Any] = field(default_factory=dict)
|
||||
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
||||
@@ -622,6 +661,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
_metric_lookup: Optional[MetricLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_entity_lookup: Optional[EntityLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
_disabled_lookup: Optional[DisabledLookup] = field(
|
||||
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
|
||||
)
|
||||
@@ -654,6 +696,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
def update_metric(self, new_metric: Metric):
|
||||
_update_into(self.metrics, new_metric)
|
||||
|
||||
def update_entity(self, new_entity: Entity):
|
||||
_update_into(self.entities, new_entity)
|
||||
|
||||
def update_node(self, new_node: ManifestNode):
|
||||
_update_into(self.nodes, new_node)
|
||||
|
||||
@@ -670,6 +715,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
"exposures": {k: v.to_dict(omit_none=False) for k, v in self.exposures.items()},
|
||||
"groups": {k: v.to_dict(omit_none=False) for k, v in self.groups.items()},
|
||||
"metrics": {k: v.to_dict(omit_none=False) for k, v in self.metrics.items()},
|
||||
"entities": {k: v.to_dict(omit_none=False) for k, v in self.entities.items()},
|
||||
"nodes": {k: v.to_dict(omit_none=False) for k, v in self.nodes.items()},
|
||||
"sources": {k: v.to_dict(omit_none=False) for k, v in self.sources.items()},
|
||||
}
|
||||
@@ -732,6 +778,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self.nodes.values(),
|
||||
self.sources.values(),
|
||||
self.metrics.values(),
|
||||
self.entities.values(),
|
||||
)
|
||||
for resource in all_resources:
|
||||
resource_type_plural = resource.resource_type.pluralize()
|
||||
@@ -760,6 +807,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
docs={k: _deepcopy(v) for k, v in self.docs.items()},
|
||||
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
|
||||
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
|
||||
entities={k: _deepcopy(v) for k, v in self.entities.items()},
|
||||
groups={k: _deepcopy(v) for k, v in self.groups.items()},
|
||||
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
|
||||
metadata=self.metadata,
|
||||
@@ -777,6 +825,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self.sources.values(),
|
||||
self.exposures.values(),
|
||||
self.metrics.values(),
|
||||
self.entities.values(),
|
||||
)
|
||||
)
|
||||
forward_edges, backward_edges = build_node_edges(edge_members)
|
||||
@@ -816,6 +865,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
docs=self.docs,
|
||||
exposures=self.exposures,
|
||||
metrics=self.metrics,
|
||||
entities=self.entities,
|
||||
groups=self.groups,
|
||||
selectors=self.selectors,
|
||||
metadata=self.metadata,
|
||||
@@ -839,6 +889,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
return self.exposures[unique_id]
|
||||
elif unique_id in self.metrics:
|
||||
return self.metrics[unique_id]
|
||||
elif unique_id in self.entities:
|
||||
return self.entities[unique_id]
|
||||
else:
|
||||
# something terrible has happened
|
||||
raise dbt.exceptions.DbtInternalError(
|
||||
@@ -875,6 +927,12 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self._metric_lookup = MetricLookup(self)
|
||||
return self._metric_lookup
|
||||
|
||||
@property
|
||||
def entity_lookup(self) -> EntityLookup:
|
||||
if self._entity_lookup is None:
|
||||
self._entity_lookup = EntityLookup(self)
|
||||
return self._entity_lookup
|
||||
|
||||
def rebuild_ref_lookup(self):
|
||||
self._ref_lookup = RefableLookup(self)
|
||||
|
||||
@@ -975,6 +1033,31 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
return Disabled(disabled[0])
|
||||
return None
|
||||
|
||||
def resolve_entity(
|
||||
self,
|
||||
target_entity_name: str,
|
||||
target_entity_package: Optional[str],
|
||||
current_project: str,
|
||||
node_package: str,
|
||||
) -> MaybeEntityNode:
|
||||
|
||||
entity: Optional[Entity] = None
|
||||
disabled: Optional[List[Entity]] = None
|
||||
|
||||
candidates = _search_packages(current_project, node_package, target_entity_package)
|
||||
for pkg in candidates:
|
||||
entity = self.entity_lookup.find(target_entity_name, pkg, self)
|
||||
|
||||
if entity is not None and entity.config.enabled:
|
||||
return entity
|
||||
|
||||
# it's possible that the node is disabled
|
||||
if disabled is None:
|
||||
disabled = self.disabled_lookup.find(f"{target_entity_name}", pkg)
|
||||
if disabled:
|
||||
return Disabled(disabled[0])
|
||||
return None
|
||||
|
||||
# Called by DocsRuntimeContext.doc
|
||||
def resolve_doc(
|
||||
self,
|
||||
@@ -1087,6 +1170,11 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self.metrics[metric.unique_id] = metric
|
||||
source_file.metrics.append(metric.unique_id)
|
||||
|
||||
def add_entity(self, source_file: SchemaSourceFile, entity: Entity):
|
||||
_check_duplicates(entity, self.entities)
|
||||
self.entities[entity.unique_id] = entity
|
||||
source_file.entities.append(entity.unique_id)
|
||||
|
||||
def add_group(self, source_file: SchemaSourceFile, group: Group):
|
||||
_check_duplicates(group, self.groups)
|
||||
self.groups[group.unique_id] = group
|
||||
@@ -1107,6 +1195,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
source_file.add_test(node.unique_id, test_from)
|
||||
if isinstance(node, Metric):
|
||||
source_file.metrics.append(node.unique_id)
|
||||
if isinstance(node, Entity):
|
||||
source_file.entities.append(node.unique_id)
|
||||
if isinstance(node, Exposure):
|
||||
source_file.exposures.append(node.unique_id)
|
||||
else:
|
||||
@@ -1134,6 +1224,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self.docs,
|
||||
self.exposures,
|
||||
self.metrics,
|
||||
self.entities,
|
||||
self.groups,
|
||||
self.selectors,
|
||||
self.files,
|
||||
@@ -1147,11 +1238,20 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
|
||||
self._source_lookup,
|
||||
self._ref_lookup,
|
||||
self._metric_lookup,
|
||||
self._entity_lookup,
|
||||
self._disabled_lookup,
|
||||
self._analysis_lookup,
|
||||
)
|
||||
return self.__class__, args
|
||||
|
||||
@property
|
||||
def user_configured_model(self):
|
||||
user_configured_model = UserConfiguredModel(
|
||||
entities=[entity for entity in self.entities.values()],
|
||||
metrics=[metric for metric in self.metrics.values()],
|
||||
)
|
||||
return user_configured_model
|
||||
|
||||
|
||||
class MacroManifest(MacroMethods):
|
||||
def __init__(self, macros):
|
||||
@@ -1188,6 +1288,9 @@ class WritableManifest(ArtifactMixin):
|
||||
metrics: Mapping[UniqueID, Metric] = field(
|
||||
metadata=dict(description=("The metrics defined in the dbt project and its dependencies"))
|
||||
)
|
||||
entities: Mapping[UniqueID, Entity] = field(
|
||||
metadata=dict(description=("The entities defined in the dbt project and its dependencies"))
|
||||
)
|
||||
groups: Mapping[UniqueID, Group] = field(
|
||||
metadata=dict(description=("The groups defined in the dbt project"))
|
||||
)
|
||||
|
||||
60
core/dbt/contracts/graph/measures.py
Normal file
60
core/dbt/contracts/graph/measures.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
from dbt.contracts.util import Replaceable, Mergeable
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Dict, Any
|
||||
from dbt.semantic.aggregation_properties import AggregationType
|
||||
from dbt.semantic.references import TimeDimensionReference, MeasureReference
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeasureAggregationParameters(dbtClassMixin, Replaceable):
|
||||
"""Describes parameters for aggregations"""
|
||||
|
||||
percentile: Optional[float] = None
|
||||
use_discrete_percentile: bool = False
|
||||
use_approximate_percentile: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeasureNonAdditiveDimensionParameters(dbtClassMixin, Replaceable):
|
||||
"""Describes the params for specifying non-additive dimensions in a measure.
|
||||
NOTE: Currently, only TimeDimensions are supported for this filter
|
||||
"""
|
||||
|
||||
name: str
|
||||
window_choice: AggregationType = AggregationType.MIN
|
||||
window_groupings: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Measure(dbtClassMixin, Mergeable):
|
||||
"""Describes a measure"""
|
||||
|
||||
name: str
|
||||
agg: AggregationType
|
||||
description: str = ""
|
||||
expr: Optional[str] = None
|
||||
create_metric: Optional[bool] = None
|
||||
agg_params: Optional[MeasureAggregationParameters] = None
|
||||
non_additive_dimension: Optional[MeasureNonAdditiveDimensionParameters] = None
|
||||
# Defines the time dimension to aggregate this measure by. If not specified, it means to use the primary time
|
||||
# dimension in the data source.
|
||||
agg_time_dimension: Optional[str] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def checked_agg_time_dimension(self) -> TimeDimensionReference:
|
||||
"""Returns the aggregation time dimension, throwing an exception if it's not set."""
|
||||
assert self.agg_time_dimension, (
|
||||
f"Aggregation time dimension for measure {self.name} is not set! This should either be set directly on "
|
||||
f"the measure specification in the model, or else defaulted to the primary time dimension in the data "
|
||||
f"source containing the measure."
|
||||
)
|
||||
return TimeDimensionReference(name=self.agg_time_dimension)
|
||||
|
||||
@property
|
||||
def reference(self) -> MeasureReference: # noqa: D
|
||||
return MeasureReference(name=self.name)
|
||||
@@ -1,4 +1,13 @@
|
||||
from dbt.node_types import NodeType
|
||||
from __future__ import annotations
|
||||
import json
|
||||
from dbt.contracts.util import Replaceable, Mergeable
|
||||
from dbt.exceptions import ParsingError
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, List, Union
|
||||
from dbt.semantic.references import MeasureReference
|
||||
from dbt.semantic.time import TimeGranularity, string_to_time_granularity
|
||||
from dbt.semantic.constraints import WhereClauseConstraint
|
||||
|
||||
|
||||
class MetricReference(object):
|
||||
@@ -10,84 +19,155 @@ class MetricReference(object):
|
||||
return f"{self.metric_name}"
|
||||
|
||||
|
||||
class ResolvedMetricReference(MetricReference):
|
||||
"""
|
||||
Simple proxy over a Metric which delegates property
|
||||
lookups to the underlying node. Also adds helper functions
|
||||
for working with metrics (ie. __str__ and templating functions)
|
||||
class MetricType(StrEnum):
|
||||
"""Currently supported metric types"""
|
||||
|
||||
MEASURE_PROXY = "measure_proxy"
|
||||
RATIO = "ratio"
|
||||
EXPR = "expr"
|
||||
CUMULATIVE = "cumulative"
|
||||
DERIVED = "derived"
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedMetricInputMeasure(dbtClassMixin, Replaceable):
|
||||
"""Provides a pointer to a measure along with metric-specific processing directives
|
||||
If an alias is set, this will be used as the string name reference for this measure after the aggregation
|
||||
phase in the SQL plan.
|
||||
"""
|
||||
|
||||
def __init__(self, node, manifest, Relation):
|
||||
super().__init__(node.name, node.package_name)
|
||||
self.node = node
|
||||
self.manifest = manifest
|
||||
self.Relation = Relation
|
||||
name: str
|
||||
constraint: Optional[str] = None
|
||||
alias: Optional[str] = None
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.node, key)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.node.name}"
|
||||
@dataclass
|
||||
class MetricInputMeasure(dbtClassMixin, Replaceable):
|
||||
"""Provides a pointer to a measure along with metric-specific processing directives
|
||||
If an alias is set, this will be used as the string name reference for this measure after the aggregation
|
||||
phase in the SQL plan.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parent_metrics(cls, metric_node, manifest):
|
||||
yield metric_node
|
||||
name: str
|
||||
constraint: Optional[WhereClauseConstraint] = None
|
||||
alias: Optional[str] = None
|
||||
|
||||
for parent_unique_id in metric_node.depends_on.nodes:
|
||||
node = manifest.metrics.get(parent_unique_id)
|
||||
if node and node.resource_type == NodeType.Metric:
|
||||
yield from cls.parent_metrics(node, manifest)
|
||||
# Removed _from_yaml_value due to how dbt reads in yml
|
||||
|
||||
@classmethod
|
||||
def parent_metrics_names(cls, metric_node, manifest):
|
||||
yield metric_node.name
|
||||
@property
|
||||
def measure_reference(self) -> MeasureReference:
|
||||
"""Property accessor to get the MeasureReference associated with this metric input measure"""
|
||||
return MeasureReference(name=self.name)
|
||||
|
||||
for parent_unique_id in metric_node.depends_on.nodes:
|
||||
node = manifest.metrics.get(parent_unique_id)
|
||||
if node and node.resource_type == NodeType.Metric:
|
||||
yield from cls.parent_metrics_names(node, manifest)
|
||||
@property
|
||||
def post_aggregation_measure_reference(self) -> MeasureReference:
|
||||
"""Property accessor to get the MeasureReference with the aliased name, if appropriate"""
|
||||
return MeasureReference(name=self.alias or self.name)
|
||||
|
||||
@classmethod
|
||||
def reverse_dag_parsing(cls, metric_node, manifest, metric_depth_count):
|
||||
if metric_node.calculation_method == "derived":
|
||||
yield {metric_node.name: metric_depth_count}
|
||||
metric_depth_count = metric_depth_count + 1
|
||||
def __hash__(self) -> int: # noqa: D
|
||||
return hash(json.dumps(self.to_dict()))
|
||||
|
||||
for parent_unique_id in metric_node.depends_on.nodes:
|
||||
node = manifest.metrics.get(parent_unique_id)
|
||||
if (
|
||||
node
|
||||
and node.resource_type == NodeType.Metric
|
||||
and node.calculation_method == "derived"
|
||||
):
|
||||
yield from cls.reverse_dag_parsing(node, manifest, metric_depth_count)
|
||||
|
||||
def full_metric_dependency(self):
|
||||
to_return = list(set(self.parent_metrics_names(self.node, self.manifest)))
|
||||
return to_return
|
||||
@dataclass
|
||||
class MetricTimeWindow(dbtClassMixin, Mergeable):
|
||||
"""Describes the window of time the metric should be accumulated over, e.g., '1 day', '2 weeks', etc"""
|
||||
|
||||
def base_metric_dependency(self):
|
||||
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))
|
||||
count: int
|
||||
granularity: TimeGranularity
|
||||
|
||||
to_return = []
|
||||
for metric in in_scope_metrics:
|
||||
if metric.calculation_method != "derived" and metric.name not in to_return:
|
||||
to_return.append(metric.name)
|
||||
def to_string(self) -> str: # noqa: D
|
||||
return f"{self.count} {self.granularity.value}"
|
||||
|
||||
return to_return
|
||||
@staticmethod
|
||||
def parse(window: str) -> MetricTimeWindow:
|
||||
"""Returns window values if parsing succeeds, None otherwise
|
||||
Output of the form: (<time unit count>, <time granularity>, <error message>) - error message is None if window is formatted properly
|
||||
"""
|
||||
parts = window.split(" ")
|
||||
if len(parts) != 2:
|
||||
raise ParsingError(
|
||||
f"Invalid window ({window}) in cumulative metric. Should be of the form `<count> <granularity>`, e.g., `28 days`",
|
||||
)
|
||||
|
||||
def derived_metric_dependency(self):
|
||||
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))
|
||||
granularity = parts[1]
|
||||
# if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s')
|
||||
if granularity.endswith("s"):
|
||||
# months -> month
|
||||
granularity = granularity[:-1]
|
||||
if granularity not in [item.value for item in TimeGranularity]:
|
||||
raise ParsingError(
|
||||
f"Invalid time granularity {granularity} in cumulative metric window string: ({window})",
|
||||
)
|
||||
|
||||
to_return = []
|
||||
for metric in in_scope_metrics:
|
||||
if metric.calculation_method == "derived" and metric.name not in to_return:
|
||||
to_return.append(metric.name)
|
||||
count = parts[0]
|
||||
if not count.isdigit():
|
||||
raise ParsingError(
|
||||
f"Invalid count ({count}) in cumulative metric window string: ({window})"
|
||||
)
|
||||
|
||||
return to_return
|
||||
return MetricTimeWindow(
|
||||
count=int(count),
|
||||
granularity=string_to_time_granularity(granularity),
|
||||
)
|
||||
|
||||
def derived_metric_dependency_depth(self):
|
||||
metric_depth_count = 1
|
||||
to_return = list(self.reverse_dag_parsing(self.node, self.manifest, metric_depth_count))
|
||||
|
||||
return to_return
|
||||
@dataclass
|
||||
class UnparsedMetricInput(dbtClassMixin, Mergeable):
|
||||
"""Provides a pointer to a metric along with the additional properties used on that metric."""
|
||||
|
||||
name: str
|
||||
constraint: Optional[str] = None
|
||||
alias: Optional[str] = None
|
||||
offset_window: Optional[MetricTimeWindow] = None
|
||||
offset_to_grain: Optional[TimeGranularity] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricInput(dbtClassMixin, Mergeable):
|
||||
"""Provides a pointer to a metric along with the additional properties used on that metric."""
|
||||
|
||||
name: str
|
||||
constraint: Optional[WhereClauseConstraint] = None
|
||||
alias: Optional[str] = None
|
||||
offset_window: Optional[MetricTimeWindow] = None
|
||||
offset_to_grain: Optional[TimeGranularity] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedMetricTypeParams(dbtClassMixin, Mergeable):
|
||||
"""Type params add additional context to certain metric types (the context depends on the metric type)"""
|
||||
|
||||
# NOTE: Adding a union to allow for the class or a string. We
|
||||
# change to prefered class in schemas.py during conversion to Metric
|
||||
measure: Optional[Union[UnparsedMetricInputMeasure, str]] = None
|
||||
measures: List[Union[UnparsedMetricInputMeasure, str]] = field(default_factory=list)
|
||||
numerator: Optional[Union[UnparsedMetricInputMeasure, str]] = None
|
||||
denominator: Optional[Union[UnparsedMetricInputMeasure, str]] = None
|
||||
expr: Optional[str] = None
|
||||
window: Optional[Union[MetricTimeWindow, str]] = None
|
||||
grain_to_date: Optional[TimeGranularity] = None
|
||||
metrics: List[Union[UnparsedMetricInput, str]] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricTypeParams(dbtClassMixin):
|
||||
"""Type params add additional context to certain metric types (the context depends on the metric type)"""
|
||||
|
||||
measure: Optional[MetricInputMeasure] = None
|
||||
measures: List[MetricInputMeasure] = field(default_factory=list)
|
||||
numerator: Optional[MetricInputMeasure] = None
|
||||
denominator: Optional[MetricInputMeasure] = None
|
||||
expr: Optional[str] = None
|
||||
window: Optional[MetricTimeWindow] = None
|
||||
grain_to_date: Optional[TimeGranularity] = None
|
||||
metrics: List[MetricInput] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def numerator_measure_reference(self) -> Optional[MeasureReference]:
|
||||
"""Return the measure reference, if any, associated with the metric input measure defined as the numerator"""
|
||||
return self.numerator.measure_reference if self.numerator else None
|
||||
|
||||
@property
|
||||
def denominator_measure_reference(self) -> Optional[MeasureReference]:
|
||||
"""Return the measure reference, if any, associated with the metric input measure defined as the denominator"""
|
||||
return self.denominator.measure_reference if self.denominator else None
|
||||
|
||||
@@ -369,6 +369,11 @@ class MetricConfig(BaseConfig):
|
||||
group: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityConfig(BaseConfig):
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExposureConfig(BaseConfig):
|
||||
enabled: bool = True
|
||||
@@ -610,6 +615,7 @@ class SnapshotConfig(EmptySnapshotConfig):
|
||||
|
||||
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
|
||||
NodeType.Metric: MetricConfig,
|
||||
NodeType.Entity: EntityConfig,
|
||||
NodeType.Exposure: ExposureConfig,
|
||||
NodeType.Source: SourceConfig,
|
||||
NodeType.Seed: SeedConfig,
|
||||
|
||||
@@ -13,6 +13,14 @@ from typing import (
|
||||
Iterator,
|
||||
)
|
||||
|
||||
from dbt.semantic.references import (
|
||||
MeasureReference,
|
||||
LinkableElementReference,
|
||||
EntityReference,
|
||||
)
|
||||
from dbt.semantic.object_utils import hash_items
|
||||
from dbt.semantic.constraints import WhereClauseConstraint
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
|
||||
|
||||
from dbt.clients.system import write_file
|
||||
@@ -31,8 +39,16 @@ from dbt.contracts.graph.unparsed import (
|
||||
Owner,
|
||||
ExposureType,
|
||||
MaturityType,
|
||||
MetricFilter,
|
||||
MetricTime,
|
||||
)
|
||||
from dbt.contracts.graph.identifiers import Identifier
|
||||
from dbt.contracts.graph.dimensions import Dimension
|
||||
from dbt.contracts.graph.measures import Measure
|
||||
from dbt.contracts.graph.entities import EntityOrigin
|
||||
from dbt.contracts.graph.metrics import (
|
||||
MetricType,
|
||||
MetricInputMeasure,
|
||||
MetricTypeParams,
|
||||
MetricInput,
|
||||
)
|
||||
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
|
||||
from dbt.events.proto_types import NodeInfo
|
||||
@@ -57,6 +73,7 @@ from .model_config import (
|
||||
TestConfig,
|
||||
SourceConfig,
|
||||
MetricConfig,
|
||||
EntityConfig,
|
||||
ExposureConfig,
|
||||
EmptySnapshotConfig,
|
||||
SnapshotConfig,
|
||||
@@ -278,7 +295,7 @@ class ParsedNode(NodeInfoMixin, ParsedNodeMandatory, SerializableType):
|
||||
@classmethod
|
||||
def _deserialize(cls, dct: Dict[str, int]):
|
||||
# The serialized ParsedNodes do not differ from each other
|
||||
# in fields that would allow 'from_dict' to distinguis
|
||||
# in fields that would allow 'from_dict' to distinguish
|
||||
# between them.
|
||||
resource_type = dct["resource_type"]
|
||||
if resource_type == "model":
|
||||
@@ -418,6 +435,7 @@ class CompiledNode(ParsedNode):
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
sources: List[List[str]] = field(default_factory=list)
|
||||
metrics: List[List[str]] = field(default_factory=list)
|
||||
entities: List[List[str]] = field(default_factory=list)
|
||||
depends_on: DependsOn = field(default_factory=DependsOn)
|
||||
compiled_path: Optional[str] = None
|
||||
compiled: bool = False
|
||||
@@ -971,6 +989,7 @@ class Exposure(GraphNode):
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
sources: List[List[str]] = field(default_factory=list)
|
||||
metrics: List[List[str]] = field(default_factory=list)
|
||||
entities: List[List[str]] = field(default_factory=list)
|
||||
created_at: float = field(default_factory=lambda: time.time())
|
||||
|
||||
@property
|
||||
@@ -1033,27 +1052,15 @@ class Exposure(GraphNode):
|
||||
# ====================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricReference(dbtClassMixin, Replaceable):
|
||||
sql: Optional[Union[str, int]]
|
||||
unique_id: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Metric(GraphNode):
|
||||
name: str
|
||||
description: str
|
||||
label: str
|
||||
calculation_method: str
|
||||
expression: str
|
||||
filters: List[MetricFilter]
|
||||
time_grains: List[str]
|
||||
dimensions: List[str]
|
||||
type: MetricType
|
||||
type_params: MetricTypeParams
|
||||
entity: Optional[str] = None
|
||||
constraint: Optional[WhereClauseConstraint] = None
|
||||
resource_type: NodeType = field(metadata={"restrict": [NodeType.Metric]})
|
||||
timestamp: Optional[str] = None
|
||||
window: Optional[MetricTime] = None
|
||||
model: Optional[str] = None
|
||||
model_unique_id: Optional[str] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: MetricConfig = field(default_factory=MetricConfig)
|
||||
@@ -1062,8 +1069,43 @@ class Metric(GraphNode):
|
||||
depends_on: DependsOn = field(default_factory=DependsOn)
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
metrics: List[List[str]] = field(default_factory=list)
|
||||
entities: List[List[str]] = field(default_factory=list)
|
||||
created_at: float = field(default_factory=lambda: time.time())
|
||||
|
||||
@property
|
||||
def input_measures(self) -> List[MetricInputMeasure]:
|
||||
"""Return the complete list of input measure configurations for this metric"""
|
||||
tp = self.type_params
|
||||
res = tp.measures or []
|
||||
if tp.measure:
|
||||
res.append(tp.measure)
|
||||
if tp.numerator:
|
||||
res.append(tp.numerator)
|
||||
if tp.denominator:
|
||||
res.append(tp.denominator)
|
||||
|
||||
return res
|
||||
|
||||
@property
|
||||
def measure_references(self) -> List[MeasureReference]:
|
||||
"""Return the measure references associated with all input measure configurations for this metric"""
|
||||
return [x.measure_reference for x in self.input_measures]
|
||||
|
||||
@property
|
||||
def input_metrics(self) -> List[MetricInput]:
|
||||
"""Return the associated input metrics for this metric"""
|
||||
return self.type_params.metrics or []
|
||||
|
||||
@property
|
||||
def definition_hash(self) -> str: # noqa: D
|
||||
values: List[str] = [self.name, self.type_params.expr or ""]
|
||||
if self.constraint:
|
||||
values.append(self.constraint.where_clause)
|
||||
if self.constraint.linkable_names:
|
||||
values.extend(self.constraint.linkable_names)
|
||||
values.extend([m.name for m in self.measure_references])
|
||||
return hash_items(values)
|
||||
|
||||
@property
|
||||
def depends_on_nodes(self):
|
||||
return self.depends_on.nodes
|
||||
@@ -1072,35 +1114,20 @@ class Metric(GraphNode):
|
||||
def search_name(self):
|
||||
return self.name
|
||||
|
||||
def same_model(self, old: "Metric") -> bool:
|
||||
return self.model == old.model
|
||||
|
||||
def same_window(self, old: "Metric") -> bool:
|
||||
return self.window == old.window
|
||||
|
||||
def same_dimensions(self, old: "Metric") -> bool:
|
||||
return self.dimensions == old.dimensions
|
||||
|
||||
def same_filters(self, old: "Metric") -> bool:
|
||||
return self.filters == old.filters
|
||||
def same_entity(self, old: "Metric") -> bool:
|
||||
return self.entity == old.entity
|
||||
|
||||
def same_description(self, old: "Metric") -> bool:
|
||||
return self.description == old.description
|
||||
|
||||
def same_label(self, old: "Metric") -> bool:
|
||||
return self.label == old.label
|
||||
def same_type(self, old: "Metric") -> bool:
|
||||
return self.type == old.type
|
||||
|
||||
def same_calculation_method(self, old: "Metric") -> bool:
|
||||
return self.calculation_method == old.calculation_method
|
||||
def same_type_params(self, old: "Metric") -> bool:
|
||||
return self.type_params == old.type_params
|
||||
|
||||
def same_expression(self, old: "Metric") -> bool:
|
||||
return self.expression == old.expression
|
||||
|
||||
def same_timestamp(self, old: "Metric") -> bool:
|
||||
return self.timestamp == old.timestamp
|
||||
|
||||
def same_time_grains(self, old: "Metric") -> bool:
|
||||
return self.time_grains == old.time_grains
|
||||
def same_constraint(self, old: "Metric") -> bool:
|
||||
return self.constraint == old.constraint
|
||||
|
||||
def same_config(self, old: "Metric") -> bool:
|
||||
return self.config.same_contents(
|
||||
@@ -1115,21 +1142,164 @@ class Metric(GraphNode):
|
||||
return True
|
||||
|
||||
return (
|
||||
self.same_model(old)
|
||||
and self.same_window(old)
|
||||
and self.same_dimensions(old)
|
||||
and self.same_filters(old)
|
||||
and self.same_description(old)
|
||||
and self.same_label(old)
|
||||
and self.same_calculation_method(old)
|
||||
and self.same_expression(old)
|
||||
and self.same_timestamp(old)
|
||||
and self.same_time_grains(old)
|
||||
self.same_description(old)
|
||||
and self.same_entity(old)
|
||||
and self.same_constraint(old)
|
||||
and self.same_type(old)
|
||||
and self.same_type_params(old)
|
||||
and self.same_config(old)
|
||||
and True
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Entity(GraphNode):
|
||||
name: str
|
||||
model: str
|
||||
description: str
|
||||
origin: EntityOrigin
|
||||
sql_table: Optional[str] = None
|
||||
identifiers: Sequence[Identifier] = field(default_factory=list)
|
||||
dimensions: Sequence[Dimension] = field(default_factory=list)
|
||||
measures: Sequence[Measure] = field(default_factory=list)
|
||||
resource_type: NodeType = field(metadata={"restrict": [NodeType.Entity]})
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: EntityConfig = field(default_factory=EntityConfig)
|
||||
unrendered_config: Dict[str, Any] = field(default_factory=dict)
|
||||
sources: List[List[str]] = field(default_factory=list)
|
||||
depends_on: DependsOn = field(default_factory=DependsOn)
|
||||
refs: List[List[str]] = field(default_factory=list)
|
||||
entities: List[List[str]] = field(default_factory=list)
|
||||
metrics: List[List[str]] = field(default_factory=list)
|
||||
created_at: float = field(default_factory=lambda: time.time())
|
||||
|
||||
@property
|
||||
def depends_on_nodes(self):
|
||||
return self.depends_on.nodes
|
||||
|
||||
@property
|
||||
def search_name(self):
|
||||
return self.name
|
||||
|
||||
def same_model(self, old: "Entity") -> bool:
|
||||
return self.model == old.model
|
||||
|
||||
def same_identifiers(self, old: "Entity") -> bool:
|
||||
return self.identifiers == old.identifiers
|
||||
|
||||
def same_dimensions(self, old: "Entity") -> bool:
|
||||
return self.dimensions == old.dimensions
|
||||
|
||||
def same_measures(self, old: "Entity") -> bool:
|
||||
return self.measures == old.measures
|
||||
|
||||
def same_description(self, old: "Entity") -> bool:
|
||||
return self.description == old.description
|
||||
|
||||
def same_origin(self, old: "Entity") -> bool:
|
||||
return self.origin == old.origin
|
||||
|
||||
def same_config(self, old: "Entity") -> bool:
|
||||
return self.config.same_contents(
|
||||
self.unrendered_config,
|
||||
old.unrendered_config,
|
||||
)
|
||||
|
||||
def same_contents(self, old: Optional["Entity"]) -> bool:
|
||||
# existing when it didn't before is a change!
|
||||
# metadata/tags changes are not "changes"
|
||||
if old is None:
|
||||
return True
|
||||
|
||||
return (
|
||||
self.same_model(old)
|
||||
and self.same_identifiers(old)
|
||||
and self.same_dimensions(old)
|
||||
and self.same_measures(old)
|
||||
and self.same_description(old)
|
||||
and self.same_origin(old)
|
||||
and self.same_config(old)
|
||||
and True
|
||||
)
|
||||
|
||||
@property
|
||||
def identifier_references(self) -> List[LinkableElementReference]: # noqa: D
|
||||
return [i.reference for i in self.identifiers]
|
||||
|
||||
@property
|
||||
def dimension_references(self) -> List[LinkableElementReference]: # noqa: D
|
||||
return [i.reference for i in self.dimensions]
|
||||
|
||||
@property
|
||||
def measure_references(self) -> List[MeasureReference]: # noqa: D
|
||||
return [i.reference for i in self.measures]
|
||||
|
||||
def get_measure(self, measure_reference: MeasureReference) -> Measure: # noqa: D
|
||||
for measure in self.measures:
|
||||
if measure.reference == measure_reference:
|
||||
return measure
|
||||
|
||||
raise ValueError(
|
||||
f"No dimension with name ({measure_reference.name}) in data source with name ({self.name})"
|
||||
)
|
||||
|
||||
def get_dimension(self, dimension_reference: LinkableElementReference) -> Dimension: # noqa: D
|
||||
for dim in self.dimensions:
|
||||
if dim.reference == dimension_reference:
|
||||
return dim
|
||||
|
||||
raise ValueError(
|
||||
f"No dimension with name ({dimension_reference}) in data source with name ({self.name})"
|
||||
)
|
||||
|
||||
def get_identifier(
|
||||
self, identifier_reference: LinkableElementReference
|
||||
) -> Identifier: # noqa: D
|
||||
for ident in self.identifiers:
|
||||
if ident.reference == identifier_reference:
|
||||
return ident
|
||||
|
||||
raise ValueError(
|
||||
f"No identifier with name ({identifier_reference}) in data source with name ({self.name})"
|
||||
)
|
||||
|
||||
@property
|
||||
def has_validity_dimensions(self) -> bool:
|
||||
"""Returns True if there are validity params set on one or more dimensions"""
|
||||
return any([dim.validity_params is not None for dim in self.dimensions])
|
||||
|
||||
@property
|
||||
def validity_start_dimension(self) -> Optional[Dimension]:
|
||||
"""Returns the validity window start dimension, if one is set"""
|
||||
validity_start_dims = [
|
||||
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start
|
||||
]
|
||||
if not validity_start_dims:
|
||||
return None
|
||||
assert (
|
||||
len(validity_start_dims) == 1
|
||||
), "Found more than one validity start dimension. This should have been blocked in validation!"
|
||||
return validity_start_dims[0]
|
||||
|
||||
@property
|
||||
def validity_end_dimension(self) -> Optional[Dimension]:
|
||||
"""Returns the validity window end dimension, if one is set"""
|
||||
validity_end_dims = [
|
||||
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end
|
||||
]
|
||||
if not validity_end_dims:
|
||||
return None
|
||||
assert (
|
||||
len(validity_end_dims) == 1
|
||||
), "Found more than one validity end dimension. This should have been blocked in validation!"
|
||||
return validity_end_dims[0]
|
||||
|
||||
@property
|
||||
def reference(self) -> EntityReference: # noqa: D
|
||||
return EntityReference(entity_name=self.name)
|
||||
|
||||
|
||||
# ====================================
|
||||
# Group node
|
||||
# ====================================
|
||||
@@ -1204,6 +1374,7 @@ GraphMemberNode = Union[
|
||||
ResultNode,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
]
|
||||
|
||||
# All "nodes" (or node-like objects) in this file
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from __future__ import annotations
|
||||
import re
|
||||
|
||||
from dbt import deprecations
|
||||
@@ -6,7 +7,6 @@ from dbt.contracts.util import (
|
||||
AdditionalPropertiesMixin,
|
||||
Mergeable,
|
||||
Replaceable,
|
||||
rename_metric_attr,
|
||||
)
|
||||
|
||||
# trigger the PathEncoder
|
||||
@@ -15,6 +15,17 @@ from dbt.exceptions import CompilationError, ParsingError
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, ValidationError
|
||||
|
||||
# Semantic Classes
|
||||
from dbt.contracts.graph.dimensions import Dimension
|
||||
from dbt.contracts.graph.identifiers import Identifier
|
||||
from dbt.contracts.graph.measures import Measure
|
||||
from dbt.contracts.graph.metrics import (
|
||||
MetricType,
|
||||
UnparsedMetricTypeParams,
|
||||
)
|
||||
from dbt.semantic.constraints import WhereClauseConstraint
|
||||
from dbt.contracts.graph.entities import EntityMutability, EntityMutabilityType, EntityOrigin
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
@@ -458,87 +469,107 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
|
||||
raise ValidationError("Exposure owner must have at least one of 'name' or 'email'.")
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricFilter(dbtClassMixin, Replaceable):
|
||||
field: str
|
||||
operator: str
|
||||
# TODO : Can we make this Any?
|
||||
value: str
|
||||
|
||||
|
||||
class MetricTimePeriod(StrEnum):
|
||||
day = "day"
|
||||
week = "week"
|
||||
month = "month"
|
||||
year = "year"
|
||||
|
||||
def plural(self) -> str:
|
||||
return str(self) + "s"
|
||||
#########################
|
||||
# SEMANTIC LAYER CLASSES
|
||||
#########################
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricTime(dbtClassMixin, Mergeable):
|
||||
count: Optional[int] = None
|
||||
period: Optional[MetricTimePeriod] = None
|
||||
class UnparsedEntity(dbtClassMixin, Replaceable):
|
||||
"""This class is used for entity information"""
|
||||
|
||||
def __bool__(self):
|
||||
return self.count is not None and self.period is not None
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedMetric(dbtClassMixin, Replaceable):
|
||||
name: str
|
||||
label: str
|
||||
calculation_method: str
|
||||
expression: str
|
||||
model: str
|
||||
description: str = ""
|
||||
timestamp: Optional[str] = None
|
||||
time_grains: List[str] = field(default_factory=list)
|
||||
dimensions: List[str] = field(default_factory=list)
|
||||
window: Optional[MetricTime] = None
|
||||
model: Optional[str] = None
|
||||
filters: List[MetricFilter] = field(default_factory=list)
|
||||
identifiers: Sequence[Identifier] = field(default_factory=list)
|
||||
dimensions: Sequence[Dimension] = field(default_factory=list)
|
||||
measures: Sequence[Measure] = field(default_factory=list)
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# TODO: Figure out if we need this
|
||||
mutability: EntityMutability = EntityMutability(type=EntityMutabilityType.FULL_MUTATION)
|
||||
|
||||
# TODO: Figure out if we need this
|
||||
origin: EntityOrigin = EntityOrigin.SOURCE
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data):
|
||||
super(UnparsedEntity, cls).validate(data)
|
||||
# TODO: Replace this hacky way to verify a ref statement
|
||||
# We are using this today in order to verify that model field
|
||||
# is taking a ref input
|
||||
if "ref('" not in data["model"]:
|
||||
raise ParsingError(
|
||||
f"The entity '{data['name']}' does not contain a proper ref('') in the model property."
|
||||
)
|
||||
for identifier in data["identifiers"]:
|
||||
if identifier.get("entity") is None:
|
||||
if "name" not in identifier:
|
||||
raise ParsingError(
|
||||
f"Failed to define identifier entity value for entity '{data['name']}' because identifier name was not defined."
|
||||
)
|
||||
identifier["entity"] = identifier["name"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnparsedMetric(dbtClassMixin):
|
||||
"""Describes a metric"""
|
||||
|
||||
name: str
|
||||
type: MetricType
|
||||
type_params: UnparsedMetricTypeParams
|
||||
description: str = ""
|
||||
entity: Optional[str] = None
|
||||
constraint: Optional[str] = None
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
tags: List[str] = field(default_factory=list)
|
||||
config: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, data):
|
||||
data = rename_metric_attr(data, raise_deprecation_warning=True)
|
||||
super(UnparsedMetric, cls).validate(data)
|
||||
if "name" in data:
|
||||
errors = []
|
||||
if " " in data["name"]:
|
||||
errors.append("cannot contain spaces")
|
||||
# This handles failing queries due to too long metric names.
|
||||
# It only occurs in BigQuery and Snowflake (Postgres/Redshift truncate)
|
||||
if len(data["name"]) > 250:
|
||||
errors.append("cannot contain more than 250 characters")
|
||||
if not (re.match(r"^[A-Za-z]", data["name"])):
|
||||
errors.append("must begin with a letter")
|
||||
if not (re.match(r"[\w-]+$", data["name"])):
|
||||
errors.append("must contain only letters, numbers and underscores")
|
||||
|
||||
if errors:
|
||||
raise ParsingError(
|
||||
f"The metric name '{data['name']}' is invalid. It {', '.join(e for e in errors)}"
|
||||
# The following validation is because CM couldn't figure out a better way
|
||||
# to parse constraint strings into WhereClauseConstraints without throwing
|
||||
# errors all over the place
|
||||
if "constraint" in data:
|
||||
if isinstance(data["constraint"], str):
|
||||
data["constraint"] = WhereClauseConstraint.parse(data["constraint"])
|
||||
else:
|
||||
raise CompilationError(
|
||||
f"Expected input for constraint on metric {data['name']} to be of type string"
|
||||
)
|
||||
|
||||
if data.get("timestamp") is None and data.get("time_grains") is not None:
|
||||
raise ValidationError(
|
||||
f"The metric '{data['name']} has time_grains defined but is missing a timestamp dimension."
|
||||
)
|
||||
if "type_params" in data:
|
||||
if "metrics" in data["type_params"]:
|
||||
for loop_id, metric in enumerate(data["type_params"]["metrics"]):
|
||||
if isinstance(metric, dict):
|
||||
if isinstance(metric["constraint"], str):
|
||||
data["type_params"]["metrics"][loop_id][
|
||||
"constraint"
|
||||
] = WhereClauseConstraint.parse(metric["constraint"])
|
||||
|
||||
if data.get("timestamp") is None and data.get("window") is not None:
|
||||
raise ValidationError(
|
||||
f"The metric '{data['name']} has a window defined but is missing a timestamp dimension."
|
||||
)
|
||||
|
||||
if data.get("model") is None and data.get("calculation_method") != "derived":
|
||||
raise ValidationError("Non-derived metrics require a 'model' property")
|
||||
|
||||
if data.get("model") is not None and data.get("calculation_method") == "derived":
|
||||
raise ValidationError("Derived metrics cannot have a 'model' property")
|
||||
# TODO: Figure out better way to convert to input measures. We need this here
|
||||
# so we can do full "mf model" validation in schemas.py. Specifically for input
|
||||
# measure metric rules - they requrie that identifiers be present in metrics propert
|
||||
if "entity" not in data:
|
||||
if data["type"] != MetricType.DERIVED:
|
||||
raise CompilationError(
|
||||
f"The metric {data['name']} is missing the required entity property."
|
||||
)
|
||||
elif "entity" in data:
|
||||
if data["type"] == MetricType.DERIVED:
|
||||
raise CompilationError(
|
||||
f"The metric {data['name']} is derived, which does not support entity definition."
|
||||
)
|
||||
# TODO: Replace this hacky way to verify an entity lookup
|
||||
# We are doing this to ensure that the entity property is using an entity
|
||||
# function and not just providing a string
|
||||
if "entity('" not in data["entity"]:
|
||||
raise ParsingError(
|
||||
f"The metric '{data['name']}' does not contain a proper entity('') reference in the entity property."
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -214,6 +214,7 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
|
||||
sources: Dict[str, Any] = field(default_factory=dict)
|
||||
tests: Dict[str, Any] = field(default_factory=dict)
|
||||
metrics: Dict[str, Any] = field(default_factory=dict)
|
||||
entities: Dict[str, Any] = field(default_factory=dict)
|
||||
exposures: Dict[str, Any] = field(default_factory=dict)
|
||||
vars: Optional[Dict[str, Any]] = field(
|
||||
default=None,
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
from typing import (
|
||||
Type,
|
||||
ClassVar,
|
||||
cast,
|
||||
)
|
||||
from typing import Type, ClassVar, cast, List
|
||||
import re
|
||||
from dataclasses import fields
|
||||
from enum import Enum
|
||||
@@ -129,6 +125,11 @@ class StrEnum(str, SerializableType, Enum):
|
||||
def _deserialize(cls, value: str):
|
||||
return cls(value)
|
||||
|
||||
@classmethod
|
||||
def list_names(cls) -> List[str]:
|
||||
"""List valid names within this enum class"""
|
||||
return list(cls.__members__.keys())
|
||||
|
||||
|
||||
class HyphenatedDbtClassMixin(dbtClassMixin):
|
||||
# used by from_dict/to_dict
|
||||
|
||||
@@ -218,6 +218,11 @@ class DbtValidationError(DbtRuntimeError):
|
||||
MESSAGE = "Validation Error"
|
||||
|
||||
|
||||
class DbtSemanticValidationError(DbtRuntimeError):
|
||||
CODE = 10020
|
||||
MESSAGE = "Semantic Validation Error"
|
||||
|
||||
|
||||
class ParsingError(DbtRuntimeError):
|
||||
CODE = 10015
|
||||
MESSAGE = "Parsing Error"
|
||||
@@ -872,6 +877,17 @@ class MetricArgsError(CompilationError):
|
||||
return msg
|
||||
|
||||
|
||||
class EntityArgsError(CompilationError):
|
||||
def __init__(self, node, args):
|
||||
self.node = node
|
||||
self.args = args
|
||||
super().__init__(msg=self.get_message())
|
||||
|
||||
def get_message(self) -> str:
|
||||
msg = f"entity() takes at most two arguments ({len(self.args)} given)"
|
||||
return msg
|
||||
|
||||
|
||||
class RefBadContextError(CompilationError):
|
||||
def __init__(self, node, args):
|
||||
self.node = node
|
||||
@@ -1252,6 +1268,7 @@ class EnvVarMissingError(ParsingError):
|
||||
|
||||
|
||||
class TargetNotFoundError(CompilationError):
|
||||
# NOTE: CM Might be what I'm looking for
|
||||
def __init__(
|
||||
self,
|
||||
node,
|
||||
|
||||
@@ -21,7 +21,7 @@ from .selector_spec import (
|
||||
|
||||
INTERSECTION_DELIMITER = ","
|
||||
|
||||
DEFAULT_INCLUDES: List[str] = ["fqn:*", "source:*", "exposure:*", "metric:*"]
|
||||
DEFAULT_INCLUDES: List[str] = ["fqn:*", "source:*", "exposure:*", "metric:*", "entity:*"]
|
||||
DEFAULT_EXCLUDES: List[str] = []
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from dbt.contracts.graph.nodes import (
|
||||
SourceDefinition,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
GraphMemberNode,
|
||||
)
|
||||
from dbt.contracts.graph.manifest import Manifest
|
||||
@@ -51,8 +52,8 @@ class GraphQueue:
|
||||
node = self.manifest.expect(node_id)
|
||||
if node.resource_type != NodeType.Model:
|
||||
return False
|
||||
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
|
||||
assert not isinstance(node, (SourceDefinition, Exposure, Metric))
|
||||
# must be a Model - tell mypy this won't be a Source or Exposure or Metric or Entity
|
||||
assert not isinstance(node, (SourceDefinition, Exposure, Metric, Entity))
|
||||
if node.is_ephemeral:
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -163,6 +163,9 @@ class NodeSelector(MethodManager):
|
||||
elif unique_id in self.manifest.metrics:
|
||||
metric = self.manifest.metrics[unique_id]
|
||||
return metric.config.enabled
|
||||
elif unique_id in self.manifest.entities:
|
||||
entity = self.manifest.entities[unique_id]
|
||||
return entity.config.enabled
|
||||
node = self.manifest.nodes[unique_id]
|
||||
return not node.empty and node.config.enabled
|
||||
|
||||
@@ -182,6 +185,8 @@ class NodeSelector(MethodManager):
|
||||
node = self.manifest.exposures[unique_id]
|
||||
elif unique_id in self.manifest.metrics:
|
||||
node = self.manifest.metrics[unique_id]
|
||||
elif unique_id in self.manifest.entities:
|
||||
node = self.manifest.entities[unique_id]
|
||||
else:
|
||||
raise DbtInternalError(f"Node {unique_id} not found in the manifest!")
|
||||
return self.node_is_match(node)
|
||||
|
||||
@@ -12,6 +12,7 @@ from dbt.contracts.graph.nodes import (
|
||||
SingularTestNode,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
GenericTestNode,
|
||||
SourceDefinition,
|
||||
ResultNode,
|
||||
@@ -43,6 +44,7 @@ class MethodName(StrEnum):
|
||||
State = "state"
|
||||
Exposure = "exposure"
|
||||
Metric = "metric"
|
||||
Entity = "entity"
|
||||
Result = "result"
|
||||
SourceStatus = "source_status"
|
||||
|
||||
@@ -71,7 +73,7 @@ def is_selected_node(fqn: List[str], node_selector: str):
|
||||
return True
|
||||
|
||||
|
||||
SelectorTarget = Union[SourceDefinition, ManifestNode, Exposure, Metric]
|
||||
SelectorTarget = Union[SourceDefinition, ManifestNode, Exposure, Metric, Entity]
|
||||
|
||||
|
||||
class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
@@ -118,6 +120,14 @@ class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
continue
|
||||
yield unique_id, metric
|
||||
|
||||
def entity_nodes(self, included_nodes: Set[UniqueId]) -> Iterator[Tuple[UniqueId, Entity]]:
|
||||
|
||||
for key, metric in self.manifest.entities.items():
|
||||
unique_id = UniqueId(key)
|
||||
if unique_id not in included_nodes:
|
||||
continue
|
||||
yield unique_id, metric
|
||||
|
||||
def all_nodes(
|
||||
self, included_nodes: Set[UniqueId]
|
||||
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
|
||||
@@ -126,6 +136,7 @@ class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
self.source_nodes(included_nodes),
|
||||
self.exposure_nodes(included_nodes),
|
||||
self.metric_nodes(included_nodes),
|
||||
self.entity_nodes(included_nodes),
|
||||
)
|
||||
|
||||
def configurable_nodes(
|
||||
@@ -136,11 +147,12 @@ class SelectorMethod(metaclass=abc.ABCMeta):
|
||||
def non_source_nodes(
|
||||
self,
|
||||
included_nodes: Set[UniqueId],
|
||||
) -> Iterator[Tuple[UniqueId, Union[Exposure, ManifestNode, Metric]]]:
|
||||
) -> Iterator[Tuple[UniqueId, Union[Exposure, ManifestNode, Metric, Entity]]]:
|
||||
yield from chain(
|
||||
self.parsed_nodes(included_nodes),
|
||||
self.exposure_nodes(included_nodes),
|
||||
self.metric_nodes(included_nodes),
|
||||
self.entity_nodes(included_nodes),
|
||||
)
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -270,6 +282,33 @@ class MetricSelectorMethod(SelectorMethod):
|
||||
yield node
|
||||
|
||||
|
||||
class EntitySelectorMethod(SelectorMethod):
|
||||
"""TODO: Add a description of what this selector method is doing"""
|
||||
|
||||
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:
|
||||
parts = selector.split(".")
|
||||
target_package = SELECTOR_GLOB
|
||||
if len(parts) == 1:
|
||||
target_name = parts[0]
|
||||
elif len(parts) == 2:
|
||||
target_package, target_name = parts
|
||||
else:
|
||||
msg = (
|
||||
'Invalid entity selector value "{}". Entities must be of '
|
||||
"the form ${{entity_name}} or "
|
||||
"${{entity_package.entity_name}}"
|
||||
).format(selector)
|
||||
raise DbtRuntimeError(msg)
|
||||
|
||||
for node, real_node in self.entity_nodes(included_nodes):
|
||||
if target_package not in (real_node.package_name, SELECTOR_GLOB):
|
||||
continue
|
||||
if target_name not in (real_node.name, SELECTOR_GLOB):
|
||||
continue
|
||||
|
||||
yield node
|
||||
|
||||
|
||||
class PathSelectorMethod(SelectorMethod):
|
||||
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:
|
||||
"""Yields nodes from included that match the given path."""
|
||||
@@ -530,6 +569,8 @@ class StateSelectorMethod(SelectorMethod):
|
||||
previous_node = manifest.exposures[node]
|
||||
elif node in manifest.metrics:
|
||||
previous_node = manifest.metrics[node]
|
||||
elif node in manifest.entities:
|
||||
previous_node = manifest.entities[node]
|
||||
|
||||
if checker(previous_node, real_node):
|
||||
yield node
|
||||
@@ -616,6 +657,7 @@ class MethodManager:
|
||||
MethodName.State: StateSelectorMethod,
|
||||
MethodName.Exposure: ExposureSelectorMethod,
|
||||
MethodName.Metric: MetricSelectorMethod,
|
||||
MethodName.Entity: EntitySelectorMethod,
|
||||
MethodName.Result: ResultSelectorMethod,
|
||||
MethodName.SourceStatus: SourceStatusSelectorMethod,
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ class NodeType(StrEnum):
|
||||
Macro = "macro"
|
||||
Exposure = "exposure"
|
||||
Metric = "metric"
|
||||
Entity = "entity"
|
||||
Group = "group"
|
||||
|
||||
@classmethod
|
||||
@@ -67,11 +68,14 @@ class NodeType(StrEnum):
|
||||
cls.Analysis,
|
||||
cls.Exposure,
|
||||
cls.Metric,
|
||||
cls.Entity,
|
||||
]
|
||||
|
||||
def pluralize(self) -> str:
|
||||
if self is self.Analysis:
|
||||
return "analyses"
|
||||
if self is self.Entity:
|
||||
return "entities"
|
||||
return f"{self}s"
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ from dbt.events.base_types import EventLevel
|
||||
import json
|
||||
import pprint
|
||||
|
||||
from dbt.contracts.graph.metrics import UnparsedMetricInput
|
||||
|
||||
import dbt.exceptions
|
||||
import dbt.tracking
|
||||
import dbt.utils
|
||||
@@ -62,6 +64,7 @@ from dbt.contracts.graph.nodes import (
|
||||
ColumnInfo,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
SeedNode,
|
||||
ManifestNode,
|
||||
ResultNode,
|
||||
@@ -83,6 +86,8 @@ from dbt.parser.snapshots import SnapshotParser
|
||||
from dbt.parser.sources import SourcePatcher
|
||||
from dbt.version import __version__
|
||||
|
||||
from dbt.semantic.validations.model_validator import ModelValidator
|
||||
|
||||
from dbt.dataclass_schema import StrEnum, dbtClassMixin
|
||||
|
||||
MANIFEST_FILE_NAME = "manifest.json"
|
||||
@@ -354,7 +359,7 @@ class ManifestLoader:
|
||||
project, project_parser_files[project.project_name], parser_types
|
||||
)
|
||||
|
||||
# Now that we've loaded most of the nodes (except for schema tests, sources, metrics)
|
||||
# Now that we've loaded most of the nodes (except for schema tests, sources, metrics, entities)
|
||||
# load up the Lookup objects to resolve them by name, so the SourceFiles store
|
||||
# the unique_id instead of the name. Sources are loaded from yaml files, so
|
||||
# aren't in place yet
|
||||
@@ -373,6 +378,23 @@ class ManifestLoader:
|
||||
|
||||
self.process_nodes()
|
||||
|
||||
# Validate semantic model
|
||||
# TODO: Figure out how to have this be its own area
|
||||
semantic_result = ModelValidator().validate_model(self.manifest.user_configured_model)
|
||||
if semantic_result.issues.has_blocking_issues:
|
||||
error_message = "\n".join(
|
||||
issue.as_cli_formatted_str() for issue in semantic_result.issues.errors
|
||||
)
|
||||
warning_message = "\n".join(
|
||||
issue.as_cli_formatted_str() for issue in semantic_result.issues.warnings
|
||||
)
|
||||
future_error_message = "\n".join(
|
||||
issue.as_cli_formatted_str() for issue in semantic_result.issues.future_errors
|
||||
)
|
||||
raise dbt.exceptions.DbtSemanticValidationError(
|
||||
error_message + warning_message + future_error_message
|
||||
)
|
||||
|
||||
self._perf_info.parse_project_elapsed = time.perf_counter() - start_parse_projects
|
||||
|
||||
# patch_sources converts the UnparsedSourceDefinitions in the
|
||||
@@ -390,13 +412,14 @@ class ManifestLoader:
|
||||
# copy the selectors from the root_project to the manifest
|
||||
self.manifest.selectors = self.root_project.manifest_selectors
|
||||
|
||||
# update the refs, sources, docs and metrics depends_on.nodes
|
||||
# update the refs, sources, docs, entities and metrics depends_on.nodes
|
||||
# These check the created_at time on the nodes to
|
||||
# determine whether they need processing.
|
||||
start_process = time.perf_counter()
|
||||
self.process_sources(self.root_project.project_name)
|
||||
self.process_refs(self.root_project.project_name)
|
||||
self.process_docs(self.root_project)
|
||||
self.process_entities(self.root_project)
|
||||
self.process_metrics(self.root_project)
|
||||
|
||||
# update tracking data
|
||||
@@ -863,19 +886,51 @@ class ManifestLoader:
|
||||
if exposure.created_at < self.started_at:
|
||||
continue
|
||||
_process_refs_for_exposure(self.manifest, current_project, exposure)
|
||||
for metric in self.manifest.metrics.values():
|
||||
if metric.created_at < self.started_at:
|
||||
for entity in self.manifest.entities.values():
|
||||
if entity.created_at < self.started_at:
|
||||
continue
|
||||
_process_refs_for_metric(self.manifest, current_project, metric)
|
||||
_process_refs_for_entity(self.manifest, current_project, entity)
|
||||
# Metrics can only be based on entities now
|
||||
# for metric in self.manifest.metrics.values():
|
||||
# if metric.created_at < self.started_at:
|
||||
# continue
|
||||
# _process_refs_for_metric(self.manifest, current_project, metric)
|
||||
|
||||
# Takes references in 'metrics' array of nodes and exposures, finds the target
|
||||
# TODO: Get rid of this? Entities can't reference other entities, only identifiers
|
||||
# Takes references in 'entities' array of nodes and exposures, finds the target
|
||||
# node, and updates 'depends_on.nodes' with the unique id
|
||||
def process_metrics(self, config: RuntimeConfig):
|
||||
def process_entities(self, config: RuntimeConfig):
|
||||
current_project = config.project_name
|
||||
for node in self.manifest.nodes.values():
|
||||
if node.created_at < self.started_at:
|
||||
continue
|
||||
_process_metrics_for_node(self.manifest, current_project, node)
|
||||
_process_entities_for_node(self.manifest, current_project, node)
|
||||
for entity in self.manifest.entities.values():
|
||||
if entity.created_at < self.started_at:
|
||||
continue
|
||||
_process_entities_for_node(self.manifest, current_project, entity)
|
||||
for metric in self.manifest.metrics.values():
|
||||
if metric.created_at < self.started_at:
|
||||
continue
|
||||
_process_entities_for_node(self.manifest, current_project, metric)
|
||||
for exposure in self.manifest.exposures.values():
|
||||
if exposure.created_at < self.started_at:
|
||||
continue
|
||||
_process_entities_for_node(self.manifest, current_project, exposure)
|
||||
|
||||
# Takes references in 'metrics' array of metrics and exposures, finds the target
|
||||
# node, and updates 'depends_on.nodes' with the unique id
|
||||
def process_metrics(self, config: RuntimeConfig):
|
||||
current_project = config.project_name
|
||||
# NOTE: Commenting this out as metrics can now only be built on entities
|
||||
# for node in self.manifest.nodes.values():
|
||||
# if node.created_at < self.started_at:
|
||||
# continue
|
||||
# _process_metrics_for_node(self.manifest, current_project, node)
|
||||
for entity in self.manifest.entities.values():
|
||||
if entity.created_at < self.started_at:
|
||||
continue
|
||||
_process_metrics_for_node(self.manifest, current_project, entity)
|
||||
for metric in self.manifest.metrics.values():
|
||||
# TODO: Can we do this if the metric is derived & depends on
|
||||
# some other metric for its definition? Maybe....
|
||||
@@ -942,6 +997,16 @@ class ManifestLoader:
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_metrics(ctx, metric)
|
||||
for entity in self.manifest.entities.values():
|
||||
if entity.created_at < self.started_at:
|
||||
continue
|
||||
ctx = generate_runtime_docs_context(
|
||||
config,
|
||||
entity,
|
||||
self.manifest,
|
||||
config.project_name,
|
||||
)
|
||||
_process_docs_for_entities(ctx, entity)
|
||||
|
||||
# Loops through all nodes and exposures, for each element in
|
||||
# 'sources' array finds the source node and updates the
|
||||
@@ -1155,6 +1220,10 @@ def _process_docs_for_exposure(context: Dict[str, Any], exposure: Exposure) -> N
|
||||
exposure.description = get_rendered(exposure.description, context)
|
||||
|
||||
|
||||
def _process_docs_for_entities(context: Dict[str, Any], entity: Entity) -> None:
|
||||
entity.description = get_rendered(entity.description, context)
|
||||
|
||||
|
||||
def _process_docs_for_metrics(context: Dict[str, Any], metric: Metric) -> None:
|
||||
metric.description = get_rendered(metric.description, context)
|
||||
|
||||
@@ -1203,9 +1272,53 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur
|
||||
manifest.update_exposure(exposure)
|
||||
|
||||
|
||||
def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: Metric):
|
||||
"""Given a manifest and a metric in that manifest, process its refs"""
|
||||
for ref in metric.refs:
|
||||
# NOTE: commenting out as metrics are based on entities now
|
||||
# def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: Metric):
|
||||
# """Given a manifest and a metric in that manifest, process its refs"""
|
||||
# for ref in metric.refs:
|
||||
# target_model: Optional[Union[Disabled, ManifestNode]] = None
|
||||
# target_model_name: str
|
||||
# target_model_package: Optional[str] = None
|
||||
|
||||
# if len(ref) == 1:
|
||||
# target_model_name = ref[0]
|
||||
# elif len(ref) == 2:
|
||||
# target_model_package, target_model_name = ref
|
||||
# else:
|
||||
# raise dbt.exceptions.DbtInternalError(
|
||||
# f"Refs should always be 1 or 2 arguments - got {len(ref)}"
|
||||
# )
|
||||
|
||||
# target_model = manifest.resolve_ref(
|
||||
# target_model_name,
|
||||
# target_model_package,
|
||||
# current_project,
|
||||
# metric.package_name,
|
||||
# )
|
||||
|
||||
# if target_model is None or isinstance(target_model, Disabled):
|
||||
# # This may raise. Even if it doesn't, we don't want to add
|
||||
# # this metric to the graph b/c there is no destination metric
|
||||
# metric.config.enabled = False
|
||||
# invalid_target_fail_unless_test(
|
||||
# node=metric,
|
||||
# target_name=target_model_name,
|
||||
# target_kind="node",
|
||||
# target_package=target_model_package,
|
||||
# disabled=(isinstance(target_model, Disabled)),
|
||||
# should_warn_if_disabled=False,
|
||||
# )
|
||||
# continue
|
||||
|
||||
# target_model_id = target_model.unique_id
|
||||
|
||||
# metric.depends_on.nodes.append(target_model_id)
|
||||
# manifest.update_metric(metric)
|
||||
|
||||
|
||||
def _process_refs_for_entity(manifest: Manifest, current_project: str, entity: Entity):
|
||||
"""Given a manifest and an entity in that manifest, process its refs"""
|
||||
for ref in entity.refs:
|
||||
target_model: Optional[Union[Disabled, ManifestNode]] = None
|
||||
target_model_name: str
|
||||
target_model_package: Optional[str] = None
|
||||
@@ -1223,33 +1336,32 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M
|
||||
target_model_name,
|
||||
target_model_package,
|
||||
current_project,
|
||||
metric.package_name,
|
||||
entity.package_name,
|
||||
)
|
||||
|
||||
if target_model is None or isinstance(target_model, Disabled):
|
||||
# This may raise. Even if it doesn't, we don't want to add
|
||||
# this metric to the graph b/c there is no destination metric
|
||||
metric.config.enabled = False
|
||||
# this entity to the graph b/c there is no destination entity
|
||||
entity.config.enabled = False
|
||||
invalid_target_fail_unless_test(
|
||||
node=metric,
|
||||
node=entity,
|
||||
target_name=target_model_name,
|
||||
target_kind="node",
|
||||
target_package=target_model_package,
|
||||
disabled=(isinstance(target_model, Disabled)),
|
||||
should_warn_if_disabled=False,
|
||||
)
|
||||
continue
|
||||
|
||||
target_model_id = target_model.unique_id
|
||||
|
||||
metric.depends_on.nodes.append(target_model_id)
|
||||
manifest.update_metric(metric)
|
||||
entity.depends_on.nodes.append(target_model_id)
|
||||
manifest.update_entity(entity)
|
||||
|
||||
|
||||
def _process_metrics_for_node(
|
||||
manifest: Manifest,
|
||||
current_project: str,
|
||||
node: Union[ManifestNode, Metric, Exposure],
|
||||
node: Union[ManifestNode, Metric, Exposure, Entity],
|
||||
):
|
||||
"""Given a manifest and a node in that manifest, process its metrics"""
|
||||
|
||||
@@ -1262,7 +1374,10 @@ def _process_metrics_for_node(
|
||||
target_metric_package: Optional[str] = None
|
||||
|
||||
if len(metric) == 1:
|
||||
target_metric_name = metric[0]
|
||||
if isinstance(metric[0], UnparsedMetricInput):
|
||||
target_metric_name = metric[0].name
|
||||
else:
|
||||
target_metric_name = metric[0]
|
||||
elif len(metric) == 2:
|
||||
target_metric_package, target_metric_name = metric
|
||||
else:
|
||||
@@ -1295,6 +1410,55 @@ def _process_metrics_for_node(
|
||||
node.depends_on.nodes.append(target_metric_id)
|
||||
|
||||
|
||||
def _process_entities_for_node(
|
||||
manifest: Manifest,
|
||||
current_project: str,
|
||||
node: Union[ManifestNode, Entity, Exposure, Metric],
|
||||
):
|
||||
"""Given a manifest and a node in that manifest, process its entities"""
|
||||
|
||||
if isinstance(node, SeedNode):
|
||||
return
|
||||
|
||||
for entity in node.entities:
|
||||
target_entity: Optional[Union[Disabled, Entity]] = None
|
||||
target_entity_name: str
|
||||
target_entity_package: Optional[str] = None
|
||||
|
||||
if len(entity) == 1:
|
||||
target_entity_name = entity[0]
|
||||
elif len(entity) == 2:
|
||||
target_entity_package, target_entity_name = entity
|
||||
else:
|
||||
raise dbt.exceptions.DbtInternalError(
|
||||
f"Entity references should always be 1 or 2 arguments - got {len(entity)}"
|
||||
)
|
||||
|
||||
target_entity = manifest.resolve_entity(
|
||||
target_entity_name,
|
||||
target_entity_package,
|
||||
current_project,
|
||||
node.package_name,
|
||||
)
|
||||
|
||||
if target_entity is None or isinstance(target_entity, Disabled):
|
||||
# This may raise. Even if it doesn't, we don't want to add
|
||||
# this node to the graph b/c there is no destination node
|
||||
node.config.enabled = False
|
||||
invalid_target_fail_unless_test(
|
||||
node=node,
|
||||
target_name=target_entity_name,
|
||||
target_kind="entity",
|
||||
target_package=target_entity_package,
|
||||
disabled=(isinstance(target_entity, Disabled)),
|
||||
)
|
||||
continue
|
||||
|
||||
target_entity_id = target_entity.unique_id
|
||||
|
||||
node.depends_on.nodes.append(target_entity_id)
|
||||
|
||||
|
||||
def _process_refs_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
|
||||
"""Given a manifest and a node in that manifest, process its refs"""
|
||||
|
||||
@@ -1369,27 +1533,28 @@ def _process_sources_for_exposure(manifest: Manifest, current_project: str, expo
|
||||
manifest.update_exposure(exposure)
|
||||
|
||||
|
||||
def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
|
||||
target_source: Optional[Union[Disabled, SourceDefinition]] = None
|
||||
for source_name, table_name in metric.sources:
|
||||
target_source = manifest.resolve_source(
|
||||
source_name,
|
||||
table_name,
|
||||
current_project,
|
||||
metric.package_name,
|
||||
)
|
||||
if target_source is None or isinstance(target_source, Disabled):
|
||||
metric.config.enabled = False
|
||||
invalid_target_fail_unless_test(
|
||||
node=metric,
|
||||
target_name=f"{source_name}.{table_name}",
|
||||
target_kind="source",
|
||||
disabled=(isinstance(target_source, Disabled)),
|
||||
)
|
||||
continue
|
||||
target_source_id = target_source.unique_id
|
||||
metric.depends_on.nodes.append(target_source_id)
|
||||
manifest.update_metric(metric)
|
||||
# TODO: Remove this code because metrics can't be based on sources
|
||||
# def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
|
||||
# target_source: Optional[Union[Disabled, SourceDefinition]] = None
|
||||
# for source_name, table_name in metric.sources:
|
||||
# target_source = manifest.resolve_source(
|
||||
# source_name,
|
||||
# table_name,
|
||||
# current_project,
|
||||
# metric.package_name,
|
||||
# )
|
||||
# if target_source is None or isinstance(target_source, Disabled):
|
||||
# metric.config.enabled = False
|
||||
# invalid_target_fail_unless_test(
|
||||
# node=metric,
|
||||
# target_name=f"{source_name}.{table_name}",
|
||||
# target_kind="source",
|
||||
# disabled=(isinstance(target_source, Disabled)),
|
||||
# )
|
||||
# continue
|
||||
# target_source_id = target_source.unique_id
|
||||
# metric.depends_on.nodes.append(target_source_id)
|
||||
# manifest.update_metric(metric)
|
||||
|
||||
|
||||
def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
|
||||
|
||||
@@ -242,7 +242,7 @@ class PartialParsing:
|
||||
self.remove_source_override_target(source)
|
||||
|
||||
def delete_disabled(self, unique_id, file_id):
|
||||
# This node/metric/exposure is disabled. Find it and remove it from disabled dictionary.
|
||||
# This node/metric/entity/exposure is disabled. Find it and remove it from disabled dictionary.
|
||||
for dis_index, dis_node in enumerate(self.saved_manifest.disabled[unique_id]):
|
||||
if dis_node.file_id == file_id:
|
||||
node = dis_node
|
||||
@@ -441,6 +441,18 @@ class PartialParsing:
|
||||
if metric_element:
|
||||
self.delete_schema_metric(schema_file, metric_element)
|
||||
self.merge_patch(schema_file, "metrics", metric_element)
|
||||
elif unique_id in self.saved_manifest.entities:
|
||||
entity = self.saved_manifest.entities[unique_id]
|
||||
file_id = entity.file_id
|
||||
if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
|
||||
schema_file = self.saved_files[file_id]
|
||||
entities = []
|
||||
if "entities" in schema_file.dict_from_yaml:
|
||||
entities = schema_file.dict_from_yaml["entities"]
|
||||
entity_element = self.get_schema_element(entities, entity.name)
|
||||
if entity_element:
|
||||
self.delete_schema_entity(schema_file, entity_element)
|
||||
self.merge_patch(schema_file, "entities", entity_element)
|
||||
elif unique_id in self.saved_manifest.macros:
|
||||
macro = self.saved_manifest.macros[unique_id]
|
||||
file_id = macro.file_id
|
||||
@@ -746,6 +758,29 @@ class PartialParsing:
|
||||
self.delete_schema_metric(schema_file, elem)
|
||||
self.merge_patch(schema_file, dict_key, elem)
|
||||
|
||||
# entities
|
||||
dict_key = "entities"
|
||||
entity_diff = self.get_diff_for("entities", saved_yaml_dict, new_yaml_dict)
|
||||
if entity_diff["changed"]:
|
||||
for entity in entity_diff["changed"]:
|
||||
self.delete_schema_entity(schema_file, entity)
|
||||
self.merge_patch(schema_file, dict_key, entity)
|
||||
if entity_diff["deleted"]:
|
||||
for entity in entity_diff["deleted"]:
|
||||
self.delete_schema_entity(schema_file, entity)
|
||||
if entity_diff["added"]:
|
||||
for entity in entity_diff["added"]:
|
||||
self.merge_patch(schema_file, dict_key, entity)
|
||||
# Handle schema file updates due to env_var changes
|
||||
if dict_key in env_var_changes and dict_key in new_yaml_dict:
|
||||
for name in env_var_changes[dict_key]:
|
||||
if name in entity_diff["changed_or_deleted_names"]:
|
||||
continue
|
||||
elem = self.get_schema_element(new_yaml_dict[dict_key], name)
|
||||
if elem:
|
||||
self.delete_schema_entity(schema_file, elem)
|
||||
self.merge_patch(schema_file, dict_key, elem)
|
||||
|
||||
# groups
|
||||
dict_key = "groups"
|
||||
group_diff = self.get_diff_for("groups", saved_yaml_dict, new_yaml_dict)
|
||||
@@ -958,6 +993,24 @@ class PartialParsing:
|
||||
elif unique_id in self.saved_manifest.disabled:
|
||||
self.delete_disabled(unique_id, schema_file.file_id)
|
||||
|
||||
# entities are created only from schema files, but also can be referred to by other nodes
|
||||
def delete_schema_entity(self, schema_file, entity_dict):
|
||||
entity_name = entity_dict["name"]
|
||||
entities = schema_file.entities.copy()
|
||||
for unique_id in entities:
|
||||
if unique_id in self.saved_manifest.entities:
|
||||
entity = self.saved_manifest.entities[unique_id]
|
||||
if entity.name == entity_name:
|
||||
# Need to find everything that referenced this entity and schedule for parsing
|
||||
if unique_id in self.saved_manifest.child_map:
|
||||
self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
|
||||
self.deleted_manifest.entities[unique_id] = self.saved_manifest.entities.pop(
|
||||
unique_id
|
||||
)
|
||||
schema_file.entities.remove(unique_id)
|
||||
elif unique_id in self.saved_manifest.disabled:
|
||||
self.delete_disabled(unique_id, schema_file.file_id)
|
||||
|
||||
def get_schema_element(self, elem_list, elem_name):
|
||||
for element in elem_list:
|
||||
if "name" in element and element["name"] == elem_name:
|
||||
|
||||
@@ -21,11 +21,12 @@ from dbt.context.configured import generate_schema_yml_context, SchemaYamlVars
|
||||
from dbt.context.providers import (
|
||||
generate_parse_exposure,
|
||||
generate_parse_metrics,
|
||||
generate_parse_entities,
|
||||
generate_test_context,
|
||||
)
|
||||
from dbt.context.macro_resolver import MacroResolver
|
||||
from dbt.contracts.files import FileHash, SchemaSourceFile
|
||||
from dbt.contracts.graph.model_config import MetricConfig, ExposureConfig
|
||||
from dbt.contracts.graph.model_config import MetricConfig, ExposureConfig, EntityConfig
|
||||
from dbt.contracts.graph.nodes import (
|
||||
ParsedNodePatch,
|
||||
ColumnInfo,
|
||||
@@ -34,6 +35,7 @@ from dbt.contracts.graph.nodes import (
|
||||
UnpatchedSourceDefinition,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
Group,
|
||||
ManifestNode,
|
||||
GraphMemberNode,
|
||||
@@ -49,9 +51,11 @@ from dbt.contracts.graph.unparsed import (
|
||||
UnparsedNodeUpdate,
|
||||
UnparsedExposure,
|
||||
UnparsedMetric,
|
||||
UnparsedEntity,
|
||||
UnparsedSourceDefinition,
|
||||
UnparsedGroup,
|
||||
)
|
||||
from dbt.contracts.graph.measures import Measure
|
||||
from dbt.exceptions import (
|
||||
CompilationError,
|
||||
DuplicateMacroPatchNameError,
|
||||
@@ -84,6 +88,27 @@ from dbt.parser.generic_test_builders import (
|
||||
Testable,
|
||||
)
|
||||
from dbt.utils import get_pseudo_test_path, coerce_dict_str, md5
|
||||
from dbt.semantic.transformations.entity_transformations.boolean_measure_aggregation import (
|
||||
BooleanMeasureAggregation,
|
||||
)
|
||||
from dbt.semantic.transformations.entity_transformations.composite_identifier_expressions import (
|
||||
CompositeIdentifierExpressionRule,
|
||||
)
|
||||
from dbt.semantic.transformations.entity_transformations.convert_count import ConvertCountToSum
|
||||
from dbt.semantic.transformations.entity_transformations.convert_median import (
|
||||
ConvertMedianToPercentile,
|
||||
)
|
||||
from dbt.semantic.transformations.entity_transformations.lowercase_names import LowerCaseNames
|
||||
from dbt.semantic.transformations.entity_transformations.measure_aggregation_time_dimension import (
|
||||
SetMeasureAggregationTimeDimension,
|
||||
)
|
||||
from dbt.semantic.transformations.entity_transformations.proxy_measure import ProxyMeasure
|
||||
from dbt.semantic.transformations.metric_transformations.add_input_metric_measures import (
|
||||
AddInputMetricMeasures,
|
||||
)
|
||||
from dbt.semantic.transformations.metric_transformations.convert_type_params import (
|
||||
ConvertTypeParams,
|
||||
)
|
||||
|
||||
|
||||
TestDef = Union[str, Dict[str, Any]]
|
||||
@@ -97,6 +122,7 @@ schema_file_keys = (
|
||||
"analyses",
|
||||
"exposures",
|
||||
"metrics",
|
||||
"entities",
|
||||
)
|
||||
|
||||
|
||||
@@ -117,6 +143,7 @@ class ParserRef:
|
||||
def __init__(self):
|
||||
self.column_info: Dict[str, ColumnInfo] = {}
|
||||
|
||||
# TODO: Mimic this for dimension information at the entity level
|
||||
def add(
|
||||
self,
|
||||
column: Union[HasDocs, UnparsedColumn],
|
||||
@@ -517,7 +544,6 @@ class SchemaParser(SimpleParser[GenericTestBlock, GenericTestNode]):
|
||||
yaml_block = YamlBlock.from_file_block(block, dct)
|
||||
|
||||
parser: YamlDocsReader
|
||||
|
||||
# There are 7 kinds of parsers:
|
||||
# Model, Seed, Snapshot, Source, Macro, Analysis, Exposures
|
||||
|
||||
@@ -564,10 +590,16 @@ class SchemaParser(SimpleParser[GenericTestBlock, GenericTestNode]):
|
||||
exp_parser = ExposureParser(self, yaml_block)
|
||||
exp_parser.parse()
|
||||
|
||||
# parse entities
|
||||
if "entities" in dct:
|
||||
entity_parser = EntityParser(self, yaml_block)
|
||||
entity_parser.parse()
|
||||
|
||||
# parse metrics
|
||||
if "metrics" in dct:
|
||||
metric_parser = MetricParser(self, yaml_block)
|
||||
metric_parser.parse()
|
||||
metric_parser.transform()
|
||||
|
||||
# parse groups
|
||||
if "groups" in dct:
|
||||
@@ -1129,15 +1161,17 @@ class ExposureParser(YamlReader):
|
||||
config=config,
|
||||
unrendered_config=unrendered_config,
|
||||
)
|
||||
|
||||
ctx = generate_parse_exposure(
|
||||
parsed,
|
||||
self.root_project,
|
||||
self.schema_parser.manifest,
|
||||
package_name,
|
||||
)
|
||||
|
||||
depends_on_jinja = "\n".join("{{ " + line + "}}" for line in unparsed.depends_on)
|
||||
get_rendered(depends_on_jinja, ctx, parsed, capture_macros=True)
|
||||
# parsed now has a populated refs/sources/metrics
|
||||
# parsed now has a populated refs/sources/metrics/entities
|
||||
|
||||
if parsed.config.enabled:
|
||||
self.manifest.add_exposure(self.yaml.file, parsed)
|
||||
@@ -1178,6 +1212,149 @@ class ExposureParser(YamlReader):
|
||||
self.parse_exposure(unparsed)
|
||||
|
||||
|
||||
class EntityParser(YamlReader):
|
||||
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):
|
||||
super().__init__(schema_parser, yaml, NodeType.Entity.pluralize())
|
||||
self.schema_parser = schema_parser
|
||||
self.yaml = yaml
|
||||
|
||||
def parse_entity(self, unparsed: UnparsedEntity):
|
||||
package_name = self.project.project_name
|
||||
unique_id = f"{NodeType.Entity}.{package_name}.{unparsed.name}"
|
||||
entity_model_name = unparsed.model.replace('"', "'").split("'")[1]
|
||||
model_key = f"model.{package_name}.{entity_model_name}"
|
||||
path = self.yaml.path.relative_path
|
||||
|
||||
fqn = self.schema_parser.get_fqn_prefix(path)
|
||||
fqn.append(unparsed.name)
|
||||
|
||||
config = self._generate_entity_config(
|
||||
target=unparsed,
|
||||
fqn=fqn,
|
||||
package_name=package_name,
|
||||
rendered=True,
|
||||
)
|
||||
|
||||
config = config.finalize_and_validate()
|
||||
|
||||
unrendered_config = self._generate_entity_config(
|
||||
target=unparsed,
|
||||
fqn=fqn,
|
||||
package_name=package_name,
|
||||
rendered=False,
|
||||
)
|
||||
|
||||
if not isinstance(config, EntityConfig):
|
||||
raise DbtInternalError(
|
||||
f"Calculated a {type(config)} for an entity, but expected a EntityConfig"
|
||||
)
|
||||
|
||||
parsed = Entity(
|
||||
resource_type=NodeType.Entity,
|
||||
package_name=package_name,
|
||||
path=path,
|
||||
original_file_path=self.yaml.path.original_file_path,
|
||||
unique_id=unique_id,
|
||||
fqn=fqn,
|
||||
model=unparsed.model,
|
||||
sql_table=self.schema_parser.manifest.nodes[model_key].relation_name,
|
||||
name=unparsed.name,
|
||||
description=unparsed.description,
|
||||
identifiers=unparsed.identifiers,
|
||||
dimensions=unparsed.dimensions,
|
||||
measures=unparsed.measures,
|
||||
origin=unparsed.origin,
|
||||
meta=unparsed.meta,
|
||||
tags=unparsed.tags,
|
||||
config=config,
|
||||
unrendered_config=unrendered_config,
|
||||
)
|
||||
|
||||
parsed = ConvertCountToSum._transform_entity(entity=parsed)
|
||||
parsed = LowerCaseNames._transform_entity(entity=parsed)
|
||||
parsed = CompositeIdentifierExpressionRule._transform_entity(entity=parsed)
|
||||
parsed = ConvertMedianToPercentile._transform_entity(entity=parsed)
|
||||
parsed = BooleanMeasureAggregation._transform_entity(entity=parsed)
|
||||
parsed = SetMeasureAggregationTimeDimension._transform_entity(entity=parsed)
|
||||
|
||||
ctx = generate_parse_entities(
|
||||
entity=parsed,
|
||||
config=self.root_project,
|
||||
manifest=self.schema_parser.manifest,
|
||||
package_name=package_name,
|
||||
)
|
||||
|
||||
self = ProxyMeasure._create_proxy_metrics(self, parsed_entity=parsed, path=path, fqn=fqn)
|
||||
|
||||
if parsed.model is not None:
|
||||
model_ref = "{{ " + parsed.model + " }}"
|
||||
get_rendered(model_ref, ctx, parsed)
|
||||
|
||||
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
|
||||
if parsed.config.enabled:
|
||||
# self.manifest.add_metric(self.yaml.file, parsed)
|
||||
self.manifest.add_entity(self.yaml.file, parsed)
|
||||
else:
|
||||
self.manifest.add_disabled(self.yaml.file, parsed)
|
||||
|
||||
def _generate_entity_config(
|
||||
self, target: UnparsedEntity, fqn: List[str], package_name: str, rendered: bool
|
||||
):
|
||||
generator: BaseContextConfigGenerator
|
||||
if rendered:
|
||||
generator = ContextConfigGenerator(self.root_project)
|
||||
else:
|
||||
generator = UnrenderedConfigGenerator(self.root_project)
|
||||
|
||||
# configs with precendence set
|
||||
precedence_configs = dict()
|
||||
# first apply metric configs
|
||||
precedence_configs.update(target.config)
|
||||
|
||||
return generator.calculate_node_config(
|
||||
config_call_dict={},
|
||||
fqn=fqn,
|
||||
resource_type=NodeType.Entity,
|
||||
project_name=package_name,
|
||||
base=False,
|
||||
patch_config_dict=precedence_configs,
|
||||
)
|
||||
|
||||
def _generate_proxy_metric_config(
|
||||
self, target: Measure, fqn: List[str], package_name: str, rendered: bool
|
||||
):
|
||||
generator: BaseContextConfigGenerator
|
||||
if rendered:
|
||||
generator = ContextConfigGenerator(self.root_project)
|
||||
else:
|
||||
generator = UnrenderedConfigGenerator(self.root_project)
|
||||
|
||||
# configs with precendence set
|
||||
precedence_configs = dict()
|
||||
|
||||
# first apply metric configs
|
||||
precedence_configs.update(target.config)
|
||||
|
||||
return generator.calculate_node_config(
|
||||
config_call_dict={},
|
||||
fqn=fqn,
|
||||
resource_type=NodeType.Metric,
|
||||
project_name=package_name,
|
||||
base=False,
|
||||
patch_config_dict=precedence_configs,
|
||||
)
|
||||
|
||||
def parse(self):
|
||||
for data in self.get_key_dicts():
|
||||
try:
|
||||
UnparsedEntity.validate(data)
|
||||
unparsed = UnparsedEntity.from_dict(data)
|
||||
|
||||
except (ValidationError, JSONValidationError) as exc:
|
||||
raise YamlParseDictError(self.yaml.path, self.key, data, exc)
|
||||
self.parse_entity(unparsed)
|
||||
|
||||
|
||||
class MetricParser(YamlReader):
|
||||
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):
|
||||
super().__init__(schema_parser, yaml, NodeType.Metric.pluralize())
|
||||
@@ -1208,11 +1385,16 @@ class MetricParser(YamlReader):
|
||||
rendered=False,
|
||||
)
|
||||
|
||||
parsed_metric_type_params = ConvertTypeParams._get_metric_type_params(unparsed.type_params)
|
||||
|
||||
if not isinstance(config, MetricConfig):
|
||||
raise DbtInternalError(
|
||||
f"Calculated a {type(config)} for a metric, but expected a MetricConfig"
|
||||
)
|
||||
|
||||
# Given that derived metrics now come from type params CM added new way
|
||||
# of defining that relationship to the metrics part that is needed for exposures
|
||||
|
||||
parsed = Metric(
|
||||
resource_type=NodeType.Metric,
|
||||
package_name=package_name,
|
||||
@@ -1220,17 +1402,13 @@ class MetricParser(YamlReader):
|
||||
original_file_path=self.yaml.path.original_file_path,
|
||||
unique_id=unique_id,
|
||||
fqn=fqn,
|
||||
model=unparsed.model,
|
||||
name=unparsed.name,
|
||||
entity=unparsed.entity,
|
||||
description=unparsed.description,
|
||||
label=unparsed.label,
|
||||
calculation_method=unparsed.calculation_method,
|
||||
expression=str(unparsed.expression),
|
||||
timestamp=unparsed.timestamp,
|
||||
dimensions=unparsed.dimensions,
|
||||
window=unparsed.window,
|
||||
time_grains=unparsed.time_grains,
|
||||
filters=unparsed.filters,
|
||||
type=unparsed.type,
|
||||
type_params=parsed_metric_type_params,
|
||||
constraint=unparsed.constraint,
|
||||
metrics=[[metric.name] for metric in parsed_metric_type_params.metrics],
|
||||
meta=unparsed.meta,
|
||||
tags=unparsed.tags,
|
||||
config=config,
|
||||
@@ -1244,15 +1422,10 @@ class MetricParser(YamlReader):
|
||||
package_name,
|
||||
)
|
||||
|
||||
if parsed.model is not None:
|
||||
model_ref = "{{ " + parsed.model + " }}"
|
||||
get_rendered(model_ref, ctx, parsed)
|
||||
|
||||
parsed.expression = get_rendered(
|
||||
parsed.expression,
|
||||
ctx,
|
||||
node=parsed,
|
||||
)
|
||||
if parsed.entity is not None:
|
||||
entity_ref = "{{ " + parsed.entity + " }}"
|
||||
# The get rendered is the step that adds the dependencies
|
||||
get_rendered(entity_ref, ctx, parsed)
|
||||
|
||||
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
|
||||
if parsed.config.enabled:
|
||||
@@ -1293,6 +1466,15 @@ class MetricParser(YamlReader):
|
||||
raise YamlParseDictError(self.yaml.path, self.key, data, exc)
|
||||
self.parse_metric(unparsed)
|
||||
|
||||
def transform(self):
|
||||
# We validate here for the input metric measurs because
|
||||
# we need all of the metrics to be parsed. This exists for
|
||||
# derived metrics
|
||||
metrics = [metric for metric in self.manifest.metrics.values()]
|
||||
for metric in metrics:
|
||||
metric = AddInputMetricMeasures.add_input_metrics(metric=metric, metrics=metrics)
|
||||
# self.update_metric
|
||||
|
||||
|
||||
class GroupParser(YamlReader):
|
||||
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):
|
||||
|
||||
80
core/dbt/semantic/aggregation_properties.py
Normal file
80
core/dbt/semantic/aggregation_properties.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
|
||||
class AggregationType(StrEnum):
|
||||
"""Aggregation methods for measures"""
|
||||
|
||||
SUM = "sum"
|
||||
MIN = "min"
|
||||
MAX = "max"
|
||||
COUNT = "count"
|
||||
COUNT_DISTINCT = "count_distinct"
|
||||
SUM_BOOLEAN = "sum_boolean"
|
||||
AVERAGE = "average"
|
||||
PERCENTILE = "percentile"
|
||||
MEDIAN = "median"
|
||||
|
||||
@property
|
||||
def is_additive(self) -> bool:
|
||||
"""Indicates that if you sum values over a dimension grouping, you will still get an accurate result for this metric."""
|
||||
if (
|
||||
self is AggregationType.SUM
|
||||
or self is AggregationType.SUM_BOOLEAN
|
||||
or self is AggregationType.COUNT
|
||||
):
|
||||
return True
|
||||
elif (
|
||||
self is AggregationType.MIN
|
||||
or self is AggregationType.MAX
|
||||
or self is AggregationType.COUNT_DISTINCT
|
||||
or self is AggregationType.SUM_BOOLEAN
|
||||
or self is AggregationType.AVERAGE
|
||||
or self is AggregationType.PERCENTILE
|
||||
or self is AggregationType.MEDIAN
|
||||
):
|
||||
return False
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
@property
|
||||
def is_expansive(self) -> bool:
|
||||
"""Expansive ≝ Op( X ∪ Y ∪ ...) = Op( Op(X) ∪ Op(Y) ∪ ...)
|
||||
NOTE: COUNT is only expansive because it's transformed into a SUM agg during model transformation
|
||||
"""
|
||||
return self in (
|
||||
AggregationType.SUM,
|
||||
AggregationType.MIN,
|
||||
AggregationType.MAX,
|
||||
AggregationType.SUM_BOOLEAN,
|
||||
AggregationType.COUNT,
|
||||
)
|
||||
|
||||
@property
|
||||
def fill_nulls_with_0(self) -> bool:
|
||||
"""Indicates if charts should show 0 instead of null where there are gaps in data."""
|
||||
return self in (
|
||||
AggregationType.SUM,
|
||||
AggregationType.COUNT_DISTINCT,
|
||||
AggregationType.SUM_BOOLEAN,
|
||||
AggregationType.COUNT,
|
||||
)
|
||||
|
||||
@property
|
||||
def can_limit_dimension_values(self) -> bool:
|
||||
"""Indicates if we can limit dimension values in charts.
|
||||
Currently, this means:
|
||||
1. The dimensions we care about most are the ones with the highest numeric values
|
||||
2. We can calculate the "other" column in the postprocessor (meaning the metric is expansive)
|
||||
"""
|
||||
return self in (AggregationType.SUM, AggregationType.SUM_BOOLEAN, AggregationType.COUNT)
|
||||
|
||||
|
||||
class AggregationState(StrEnum):
|
||||
"""Represents how the measure is aggregated."""
|
||||
|
||||
NON_AGGREGATED = "NON_AGGREGATED"
|
||||
PARTIAL = "PARTIAL"
|
||||
COMPLETE = "COMPLETE"
|
||||
|
||||
def __repr__(self) -> str: # noqa: D
|
||||
return f"{self.__class__.__name__}.{self.name}"
|
||||
142
core/dbt/semantic/constraints.py
Normal file
142
core/dbt/semantic/constraints.py
Normal file
@@ -0,0 +1,142 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
from dbt.contracts.util import Mergeable
|
||||
|
||||
from mo_sql_parsing import parse as mo_parse
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dbt.exceptions import DbtSemanticValidationError
|
||||
from dbt.semantic.sql_bind_parameters import SqlBindParameters
|
||||
|
||||
LITERAL_STR = "literal"
|
||||
INTERVAL_LITERAL = "interval"
|
||||
|
||||
|
||||
@dataclass
|
||||
class WhereClauseConstraint(dbtClassMixin, Mergeable):
|
||||
"""Contains a string that is a where clause"""
|
||||
|
||||
where_clause: str = ""
|
||||
linkable_names: List[str] = field(default_factory=list)
|
||||
sql_params: SqlBindParameters = SqlBindParameters()
|
||||
|
||||
# def __init__( # noqa: D
|
||||
# self,
|
||||
# where_clause: str = "",
|
||||
# linkable_names: Optional[List[str]] = None,
|
||||
# sql_params: Optional[SqlBindParameters] = None,
|
||||
# # sql params: user-originated sql params that need to be escaped in a dialect-specific way keys are the
|
||||
# # name of the template value in the `where` string, values are the string to be escaped and
|
||||
# # inserted into the where string (ie where = "%(1)s", sql_values = {"1": "cote d'ivoire"})
|
||||
# ) -> WhereClauseConstraint:
|
||||
# where_clause = where_clause.strip("\n") if where_clause else ""
|
||||
# linkable_names = linkable_names or []
|
||||
# if sql_params is None:
|
||||
# sql_params = SqlBindParameters()
|
||||
# super().__init__(
|
||||
# where_clause=where_clause,
|
||||
# linkable_names=linkable_names,
|
||||
# sql_params=sql_params,
|
||||
# )
|
||||
|
||||
@staticmethod
|
||||
def parse(s: str) -> WhereClauseConstraint:
|
||||
"""Parse a string into a WhereClauseConstraint
|
||||
|
||||
We are assuming here that if we needed to parse a string, we wouldn't have bind parameters.
|
||||
Because if we had bind-parameters, the string would have existing structure, and we wouldn't need to parse it.
|
||||
"""
|
||||
s = strip_where_clause(s)
|
||||
|
||||
where_clause_str = f"WHERE {s}"
|
||||
# to piggyback on moz sql parser we need a SELECT statement
|
||||
# moz breaks the sql statement into clauses:
|
||||
# where_clause_str = "WHERE is_instant" yields -> {'select': {'value': '_'}, 'from': '_', 'where': 'is_instant'}
|
||||
# where_clause_str = "WHERE is_instant AND country = 'vanuatu' AND is_lux or ds < '2020-01-02'" yields ->
|
||||
# {'select': {'value': '_'}, 'from': '_', 'where_clause': {'or': [{'and': ['is_instant', {'eq': ['country', {'literal': 'vanuatu'}]}, 'is_lux']}, {'lt': ['ds', {'literal': '2020-01-02'}]}]}}
|
||||
parsed = mo_parse(f"select _ from _ {where_clause_str}")
|
||||
if "where" not in parsed:
|
||||
raise DbtSemanticValidationError(parsed)
|
||||
|
||||
where_clause = parsed["where"]
|
||||
|
||||
if isinstance(where_clause, dict):
|
||||
if not len(where_clause.keys()) == 1:
|
||||
raise DbtSemanticValidationError(
|
||||
f"expected parsed constraint to contain exactly one key; got {where_clause}"
|
||||
)
|
||||
return WhereClauseConstraint(
|
||||
where_clause=s,
|
||||
linkable_names=constraint_dimension_names_from_dict(where_clause),
|
||||
sql_params=SqlBindParameters(),
|
||||
)
|
||||
elif isinstance(where_clause, str):
|
||||
return WhereClauseConstraint(
|
||||
where_clause=s,
|
||||
linkable_names=[where_clause.strip()],
|
||||
sql_params=SqlBindParameters(),
|
||||
)
|
||||
else:
|
||||
raise TypeError(
|
||||
f"where-clause is neither a dict nor a string. Unexpectedly it is a {type(where_clause)}"
|
||||
)
|
||||
|
||||
def __repr__(self) -> str: # noqa: D
|
||||
return (
|
||||
f"{self.__class__.__name__}"
|
||||
f"(where_clause={self.where_clause}, linkable_names={self.linkable_names})"
|
||||
)
|
||||
|
||||
|
||||
def strip_where_clause(s: str) -> str:
|
||||
"""Removes WHERE from the beginning of the string, if present (regardless of case)"""
|
||||
# '^' tells the regex to only check the beginning of the string
|
||||
return re.sub("^where ", "", s, flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def constraint_dimension_names_from_dict(where_clause: Dict[str, Any]) -> List[str]: # type: ignore[misc] # noqa: D
|
||||
dims = []
|
||||
for key, clause in where_clause.items():
|
||||
if key == LITERAL_STR or key == INTERVAL_LITERAL:
|
||||
continue
|
||||
dims += _get_dimensions_from_clause(clause)
|
||||
|
||||
return dims
|
||||
|
||||
|
||||
def constraint_values_from_dict(where_clause: Dict[str, Any]) -> List[str]: # type: ignore[misc] # noqa: d
|
||||
values = []
|
||||
for key, clause in where_clause.items():
|
||||
if key == LITERAL_STR:
|
||||
values.append(clause)
|
||||
elif isinstance(clause, dict):
|
||||
values += constraint_values_from_dict(clause)
|
||||
elif isinstance(clause, list):
|
||||
for item in clause:
|
||||
if isinstance(item, dict):
|
||||
values += constraint_values_from_dict(item)
|
||||
|
||||
return values
|
||||
|
||||
|
||||
def _constraint_dimensions_from_list(list_clause: List[Any]) -> List[str]: # type: ignore[misc] # noqa: D
|
||||
dims = []
|
||||
for clause in list_clause:
|
||||
dims += _get_dimensions_from_clause(clause)
|
||||
|
||||
return dims
|
||||
|
||||
|
||||
def _get_dimensions_from_clause(clause: Any) -> List[str]: # type: ignore[misc] # noqa: D
|
||||
if clause is not None:
|
||||
if isinstance(clause, dict):
|
||||
return constraint_dimension_names_from_dict(clause)
|
||||
elif isinstance(clause, list):
|
||||
return _constraint_dimensions_from_list(clause)
|
||||
elif isinstance(clause, str):
|
||||
return [clause.strip()]
|
||||
|
||||
return []
|
||||
278
core/dbt/semantic/object_utils.py
Normal file
278
core/dbt/semantic/object_utils.py
Normal file
@@ -0,0 +1,278 @@
|
||||
from __future__ import annotations
|
||||
import pprint
|
||||
import textwrap
|
||||
import itertools
|
||||
import random
|
||||
import string
|
||||
from collections import OrderedDict, defaultdict, deque
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import is_dataclass, fields
|
||||
import datetime
|
||||
from hashlib import sha1
|
||||
from typing import Sequence, TypeVar, Tuple, NoReturn, Union
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
|
||||
def assert_values_exhausted(value: NoReturn) -> NoReturn:
|
||||
"""Helper method to allow MyPy to guarantee an exhaustive switch through an enumeration or literal
|
||||
|
||||
DO NOT MODIFY THE TYPE SIGNATURE OF THIS FUNCTION UNLESS MYPY CHANGES HOW IT HANDLES THINGS
|
||||
|
||||
To use this function correctly you MUST do an exhaustive switch through ALL values, using `is` for comparison
|
||||
(doing x == SomeEnum.VALUE will not work, nor will `x in (SomeEnum.VALUE_1, SomeEnum.VALUE_2)`).
|
||||
|
||||
If mypy raises an error of the form:
|
||||
`x has incompatible type SomeEnum; expected NoReturn`
|
||||
the switch is not constructed correctly. Fix your switch statement to use `is` for all comparisons.
|
||||
|
||||
If mypy raises an error of the form
|
||||
`x has incompatible type Union[Literal...]` expected NoReturn`
|
||||
the switch statement is non-exhaustive, and the values listed in the error message need to be accounted for.
|
||||
|
||||
See https://mypy.readthedocs.io/en/stable/literal_types.html#exhaustiveness-checks
|
||||
For an enum example, see issue:
|
||||
https://github.com/python/mypy/issues/6366#issuecomment-560369716
|
||||
"""
|
||||
assert False, f"Should be unreachable, but got {value}"
|
||||
|
||||
|
||||
def assert_exactly_one_arg_set(**kwargs) -> None: # type: ignore
|
||||
"""Throws an assertion error if 0 or more than 1 argument is not None."""
|
||||
num_set = 0
|
||||
for value in kwargs.values():
|
||||
if value is not None:
|
||||
num_set += 1
|
||||
|
||||
assert num_set == 1, f"{num_set} argument(s) set instead of 1 in arguments: {kwargs}"
|
||||
|
||||
|
||||
def is_hashable_base_model(obj): # type:ignore # noqa: D
|
||||
return isinstance(obj, dbtClassMixin)
|
||||
|
||||
|
||||
def _to_pretty_printable_object(obj): # type: ignore
|
||||
"""Convert the object that will look nicely when fed into the PrettyPrinter.
|
||||
|
||||
Main change is that dataclasses will have a field with the class name. In Python 3.10, the pretty printer class will
|
||||
support dataclasses, so we can remove this once we're on 3.10. Also tried the prettyprint package with dataclasses,
|
||||
but that prints full names for the classes e.g. a.b.MyClass and it also always added line breaks, even if an object
|
||||
could fit on one line, so preferred to not use that.
|
||||
|
||||
e.g.
|
||||
metricflow.specs.DimensionSpec(
|
||||
name='country',
|
||||
identifier_links=()
|
||||
),
|
||||
|
||||
Instead, the below will print something like:
|
||||
|
||||
{'class': 'DimensionSpec',
|
||||
'name': 'country_latest',
|
||||
'identifier_links': ({'class': 'IdentifierSpec',
|
||||
'name': 'listing',
|
||||
'identifier_links': ()},)}
|
||||
"""
|
||||
if obj is None:
|
||||
return None
|
||||
|
||||
elif isinstance(obj, (str, int, float)):
|
||||
return obj
|
||||
|
||||
elif isinstance(obj, (list, tuple)):
|
||||
result = []
|
||||
for item in obj:
|
||||
result.append(_to_pretty_printable_object(item))
|
||||
|
||||
if isinstance(obj, list):
|
||||
return result
|
||||
elif isinstance(obj, tuple):
|
||||
return tuple(result)
|
||||
|
||||
assert False
|
||||
|
||||
elif isinstance(obj, Mapping):
|
||||
result = {}
|
||||
for key, value in obj.items():
|
||||
result[_to_pretty_printable_object(key)] = _to_pretty_printable_object(value)
|
||||
return result
|
||||
|
||||
elif is_dataclass(obj):
|
||||
result = {"class": type(obj).__name__}
|
||||
|
||||
for field in fields(obj):
|
||||
result[field.name] = _to_pretty_printable_object(getattr(obj, field.name))
|
||||
return result
|
||||
elif is_hashable_base_model(obj):
|
||||
result = {"class": type(obj).__name__}
|
||||
|
||||
for field_name, value in obj.dict().items():
|
||||
result[field_name] = _to_pretty_printable_object(value)
|
||||
return result
|
||||
|
||||
# Can't make it more pretty.
|
||||
return obj
|
||||
|
||||
|
||||
def pretty_format(obj) -> str: # type: ignore
|
||||
"""Return the object as a string that looks pretty."""
|
||||
if isinstance(obj, str):
|
||||
return obj
|
||||
return pprint.pformat(_to_pretty_printable_object(obj), width=80, sort_dicts=False)
|
||||
|
||||
|
||||
def pformat_big_objects(*args, **kwargs) -> str: # type: ignore
|
||||
"""Prints a series of objects with many fields in a pretty way.
|
||||
|
||||
See _to_pretty_printable_object() for more context on this format. Looks like:
|
||||
|
||||
measure_recipe:
|
||||
{'class': 'MeasureRecipe',
|
||||
'measure_node': ReadSqlSourceNode(node_id=rss_140),
|
||||
'required_local_linkable_specs': ({'class': 'DimensionSpec',
|
||||
'name': 'is_instant',
|
||||
'identifier_links': ()},),
|
||||
'join_linkable_instances_recipes': ()}
|
||||
|
||||
"""
|
||||
items = []
|
||||
for arg in args:
|
||||
items.append(pretty_format(arg))
|
||||
for key, value in kwargs.items():
|
||||
items.append(f"{key}:")
|
||||
items.append(textwrap.indent(pretty_format(value), prefix=" "))
|
||||
return "\n".join(items)
|
||||
|
||||
|
||||
SequenceT = TypeVar("SequenceT")
|
||||
|
||||
|
||||
def flatten_nested_sequence(
|
||||
sequence_of_sequences: Sequence[Sequence[SequenceT]],
|
||||
) -> Tuple[SequenceT, ...]:
|
||||
"""Convert a nested sequence into a flattened tuple.
|
||||
|
||||
e.g. ((1,2), (3,4)) -> (1, 2, 3, 4)
|
||||
"""
|
||||
return tuple(itertools.chain.from_iterable(sequence_of_sequences))
|
||||
|
||||
|
||||
def flatten_and_dedupe(
|
||||
sequence_of_sequences: Sequence[Sequence[SequenceT]],
|
||||
) -> Tuple[SequenceT, ...]:
|
||||
"""Convert a nested sequence into a flattened tuple, with de-duping.
|
||||
|
||||
e.g. ((1,2), (2,3)) -> (1, 2, 3)
|
||||
"""
|
||||
items = flatten_nested_sequence(sequence_of_sequences)
|
||||
return tuple(OrderedDict.fromkeys(items))
|
||||
|
||||
|
||||
def random_id() -> str:
|
||||
"""Generates an 8-digit random alphanumeric string."""
|
||||
alphabet = string.ascii_lowercase + string.digits
|
||||
# Characters that go below the line are visually unappealing, so don't use those.
|
||||
filtered_alphabet = [x for x in alphabet if x not in "gjpqy"]
|
||||
return "".join(random.choices(filtered_alphabet, k=8))
|
||||
|
||||
|
||||
def hash_items(items: Sequence[SqlColumnType]) -> str:
|
||||
"""Produces a hash from a list of strings."""
|
||||
hash_builder = sha1()
|
||||
for item in items:
|
||||
hash_builder.update(str(item).encode("utf-8"))
|
||||
return hash_builder.hexdigest()
|
||||
|
||||
|
||||
SqlColumnType = Union[str, int, float, datetime.datetime, datetime.date, bool]
|
||||
|
||||
|
||||
class iter_bucket:
|
||||
"""Wrap *iterable* and return an object that buckets it iterable into
|
||||
child iterables based on a *key* function.
|
||||
>>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
|
||||
>>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
|
||||
>>> sorted(list(s)) # Get the keys
|
||||
['a', 'b', 'c']
|
||||
>>> a_iterable = s['a']
|
||||
>>> next(a_iterable)
|
||||
'a1'
|
||||
>>> next(a_iterable)
|
||||
'a2'
|
||||
>>> list(s['b'])
|
||||
['b1', 'b2', 'b3']
|
||||
The original iterable will be advanced and its items will be cached until
|
||||
they are used by the child iterables. This may require significant storage.
|
||||
By default, attempting to select a bucket to which no items belong will
|
||||
exhaust the iterable and cache all values.
|
||||
If you specify a *validator* function, selected buckets will instead be
|
||||
checked against it.
|
||||
>>> from itertools import count
|
||||
>>> it = count(1, 2) # Infinite sequence of odd numbers
|
||||
>>> key = lambda x: x % 10 # Bucket by last digit
|
||||
>>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
|
||||
>>> s = bucket(it, key=key, validator=validator)
|
||||
>>> 2 in s
|
||||
False
|
||||
>>> list(s[2])
|
||||
[]
|
||||
"""
|
||||
|
||||
def __init__(self, iterable, key, validator=None):
|
||||
self._it = iter(iterable)
|
||||
self._key = key
|
||||
self._cache = defaultdict(deque)
|
||||
self._validator = validator or (lambda x: True)
|
||||
|
||||
def __contains__(self, value):
|
||||
if not self._validator(value):
|
||||
return False
|
||||
|
||||
try:
|
||||
item = next(self[value])
|
||||
except StopIteration:
|
||||
return False
|
||||
else:
|
||||
self._cache[value].appendleft(item)
|
||||
|
||||
return True
|
||||
|
||||
def _get_values(self, value):
|
||||
"""
|
||||
Helper to yield items from the parent iterator that match *value*.
|
||||
Items that don't match are stored in the local cache as they
|
||||
are encountered.
|
||||
"""
|
||||
while True:
|
||||
# If we've cached some items that match the target value, emit
|
||||
# the first one and evict it from the cache.
|
||||
if self._cache[value]:
|
||||
yield self._cache[value].popleft()
|
||||
# Otherwise we need to advance the parent iterator to search for
|
||||
# a matching item, caching the rest.
|
||||
else:
|
||||
while True:
|
||||
try:
|
||||
item = next(self._it)
|
||||
except StopIteration:
|
||||
return
|
||||
item_value = self._key(item)
|
||||
if item_value == value:
|
||||
yield item
|
||||
break
|
||||
elif self._validator(item_value):
|
||||
self._cache[item_value].append(item)
|
||||
|
||||
def __iter__(self):
|
||||
for item in self._it:
|
||||
item_value = self._key(item)
|
||||
if self._validator(item_value):
|
||||
self._cache[item_value].append(item)
|
||||
|
||||
yield from self._cache.keys()
|
||||
|
||||
def __getitem__(self, value):
|
||||
if not self._validator(value):
|
||||
return iter(())
|
||||
|
||||
return self._get_values(value)
|
||||
111
core/dbt/semantic/references.py
Normal file
111
core/dbt/semantic/references.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ElementReference(dbtClassMixin):
|
||||
"""Used when we need to refer to a dimension, measure, identifier, but other attributes are unknown."""
|
||||
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LinkableElementReference(ElementReference):
|
||||
"""Used when we need to refer to a dimension or identifier, but other attributes are unknown."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MeasureReference(ElementReference):
|
||||
"""Used when we need to refer to a measure (separate from LinkableElementReference because measures aren't linkable"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DimensionReference(LinkableElementReference): # noqa: D
|
||||
pass
|
||||
|
||||
@property
|
||||
def time_dimension_reference(self) -> TimeDimensionReference: # noqa: D
|
||||
return TimeDimensionReference(name=self.name)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TimeDimensionReference(DimensionReference): # noqa: D
|
||||
pass
|
||||
|
||||
def dimension_reference(self) -> DimensionReference: # noqa: D
|
||||
return DimensionReference(name=self.name)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IdentifierReference(LinkableElementReference): # noqa: D
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CompositeSubIdentifierReference(ElementReference): # noqa: D
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetricReference(ElementReference): # noqa: D
|
||||
pass
|
||||
|
||||
|
||||
class ModelReference(dbtClassMixin):
|
||||
"""A reference to something in the model.
|
||||
For example, a measure instance could have a defined_from field that has a model reference to the measure / data
|
||||
source that it is supposed to reference. Added for exploratory purposes, so whether this is needed is TBD.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntityReference(ModelReference):
|
||||
"""A reference to a entity definition in the model."""
|
||||
|
||||
entity_name: str
|
||||
|
||||
def __hash__(self) -> int: # noqa: D
|
||||
return hash(self.entity_name)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EntityElementReference(ModelReference):
|
||||
"""A reference to an element definition in a data source definition in the model.
|
||||
TODO: Fields should be *Reference objects.
|
||||
"""
|
||||
|
||||
entity_name: str
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def create_from_references( # noqa: D
|
||||
entity_reference: EntityReference, element_reference: ElementReference
|
||||
) -> EntityElementReference:
|
||||
return EntityElementReference(
|
||||
entity_name=entity_reference.entity_name,
|
||||
name=element_reference.name,
|
||||
)
|
||||
|
||||
@property
|
||||
def entity_reference(self) -> EntityReference: # noqa: D
|
||||
return EntityReference(self.entity_name)
|
||||
|
||||
def is_from(self, ref: EntityReference) -> bool:
|
||||
"""Returns true if this reference is from the same data source as the supplied reference."""
|
||||
return self.entity_name == ref.entity_name
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetricModelReference(ModelReference):
|
||||
"""A reference to a metric definition in the model."""
|
||||
|
||||
metric_name: str
|
||||
131
core/dbt/semantic/sql_bind_parameters.py
Normal file
131
core/dbt/semantic/sql_bind_parameters.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional, Tuple, Mapping
|
||||
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dbt.semantic.object_utils import SqlColumnType, assert_exactly_one_arg_set
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SqlBindParameterValue(dbtClassMixin):
|
||||
"""SqlColumnType has issues with serialization, so using this union-style type."""
|
||||
|
||||
str_value: Optional[str] = None
|
||||
int_value: Optional[int] = None
|
||||
float_value: Optional[float] = None
|
||||
datetime_value: Optional[datetime.datetime] = None
|
||||
date_value: Optional[datetime.date] = None
|
||||
bool_value: Optional[bool] = None
|
||||
|
||||
def __post_init__(self) -> None: # noqa: D
|
||||
assert_exactly_one_arg_set(
|
||||
str_value=self.str_value,
|
||||
int_value=self.int_value,
|
||||
float_value=self.float_value,
|
||||
datetime_value=self.datetime_value,
|
||||
date_value=self.date_value,
|
||||
bool_value=self.bool_value,
|
||||
)
|
||||
|
||||
@property
|
||||
def union_value(self) -> SqlColumnType: # noqa: D
|
||||
if self.str_value is not None:
|
||||
return self.str_value
|
||||
elif self.int_value is not None:
|
||||
return self.int_value
|
||||
elif self.float_value is not None:
|
||||
return self.float_value
|
||||
elif self.datetime_value is not None:
|
||||
return self.datetime_value
|
||||
elif self.date_value is not None:
|
||||
return self.date_value
|
||||
elif self.bool_value is not None:
|
||||
return self.bool_value
|
||||
raise RuntimeError("No values are set - this should have been prevented by the post init")
|
||||
|
||||
@staticmethod
|
||||
def create_from_sql_column_type(value: SqlColumnType) -> SqlBindParameterValue:
|
||||
"""Convenience method for creating these values. Frowning on the use of isinstance()."""
|
||||
|
||||
if isinstance(value, str):
|
||||
return SqlBindParameterValue(str_value=value)
|
||||
elif isinstance(value, int):
|
||||
return SqlBindParameterValue(int_value=value)
|
||||
elif isinstance(value, float):
|
||||
return SqlBindParameterValue(float_value=value)
|
||||
elif isinstance(value, datetime.datetime):
|
||||
return SqlBindParameterValue(datetime_value=value)
|
||||
elif isinstance(value, datetime.date):
|
||||
return SqlBindParameterValue(date_value=value)
|
||||
elif isinstance(value, bool):
|
||||
return SqlBindParameterValue(bool_value=value)
|
||||
|
||||
raise RuntimeError(f"Unhandled type: {type(value)}")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SqlBindParameter(dbtClassMixin): # noqa: D
|
||||
key: str
|
||||
value: SqlBindParameterValue
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SqlBindParameters(dbtClassMixin):
|
||||
"""Helps to build execution parameters during SQL query rendering.
|
||||
These can be used as per https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql
|
||||
"""
|
||||
|
||||
# Using tuples for immutability as dicts are not.
|
||||
param_items: Tuple[SqlBindParameter, ...] = ()
|
||||
|
||||
def combine(self, additional_params: SqlBindParameters) -> SqlBindParameters:
|
||||
"""Create a new set of bind parameters that includes parameters from this and additional_params"""
|
||||
if len(self.param_items) == 0:
|
||||
return additional_params
|
||||
|
||||
if len(additional_params.param_items) == 0:
|
||||
return self
|
||||
|
||||
self_dict = {item.key: item.value for item in self.param_items}
|
||||
other_dict = {item.key: item.value for item in additional_params.param_items}
|
||||
|
||||
for key, value in other_dict.items():
|
||||
if key in self_dict and self_dict[key] != value:
|
||||
raise RuntimeError(
|
||||
f"Conflict with key {key} in combining parameters. "
|
||||
f"Existing params: {self_dict} Additional params: {other_dict}"
|
||||
)
|
||||
new_items = list(self.param_items)
|
||||
included_keys = set(item.key for item in new_items)
|
||||
for item in additional_params.param_items:
|
||||
if item.key in included_keys:
|
||||
continue
|
||||
new_items.append(item)
|
||||
included_keys.add(item.key)
|
||||
|
||||
return SqlBindParameters(param_items=tuple(new_items))
|
||||
|
||||
@property
|
||||
def param_dict(self) -> OrderedDict[str, SqlColumnType]:
|
||||
"""Useful for passing into SQLAlchemy / DB-API methods."""
|
||||
param_dict: OrderedDict[str, SqlColumnType] = OrderedDict()
|
||||
for item in self.param_items:
|
||||
param_dict[item.key] = item.value.union_value
|
||||
return param_dict
|
||||
|
||||
@staticmethod
|
||||
def create_from_dict(param_dict: Mapping[str, SqlColumnType]) -> SqlBindParameters: # noqa: D
|
||||
return SqlBindParameters(
|
||||
tuple(
|
||||
SqlBindParameter(
|
||||
key=key, value=SqlBindParameterValue.create_from_sql_column_type(value)
|
||||
)
|
||||
for key, value in param_dict.items()
|
||||
)
|
||||
)
|
||||
|
||||
def __eq__(self, other: Any) -> bool: # type: ignore # noqa: D
|
||||
return isinstance(other, SqlBindParameters) and self.param_dict == other.param_dict
|
||||
219
core/dbt/semantic/time.py
Normal file
219
core/dbt/semantic/time.py
Normal file
@@ -0,0 +1,219 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from dbt.dataclass_schema import StrEnum
|
||||
|
||||
from dbt.semantic.object_utils import assert_values_exhausted
|
||||
|
||||
|
||||
class TimeGranularity(StrEnum):
|
||||
"""For time dimensions, the smallest possible difference between two time values.
|
||||
Needed for calculating adjacency when merging 2 different time ranges.
|
||||
"""
|
||||
|
||||
# Names are used in parameters to DATE_TRUNC, so don't change them.
|
||||
# Values are used to convert user supplied strings to enums.
|
||||
DAY = "day"
|
||||
WEEK = "week"
|
||||
MONTH = "month"
|
||||
QUARTER = "quarter"
|
||||
YEAR = "year"
|
||||
|
||||
def to_int(self) -> int:
|
||||
"""Convert to an int so that the size of the granularity can be easily compared."""
|
||||
if self is TimeGranularity.DAY:
|
||||
return 10
|
||||
elif self is TimeGranularity.WEEK:
|
||||
return 11
|
||||
elif self is TimeGranularity.MONTH:
|
||||
return 12
|
||||
elif self is TimeGranularity.QUARTER:
|
||||
return 13
|
||||
elif self is TimeGranularity.YEAR:
|
||||
return 14
|
||||
else:
|
||||
assert_values_exhausted(self)
|
||||
|
||||
def is_smaller_than(self, other: "TimeGranularity") -> bool: # noqa: D
|
||||
return self.to_int() < other.to_int()
|
||||
|
||||
def is_smaller_than_or_equal(self, other: "TimeGranularity") -> bool: # noqa: D
|
||||
return self.to_int() <= other.to_int()
|
||||
|
||||
# @property
|
||||
# def offset_period(self) -> pd.offsets.DateOffset:
|
||||
# """Offset object to use for adjusting by one granularity period."""
|
||||
# # The type checker is throwing errors for some of those arguments, but they are valid.
|
||||
# if self is TimeGranularity.DAY:
|
||||
# return pd.offsets.DateOffset(days=1) # type: ignore
|
||||
# elif self is TimeGranularity.WEEK:
|
||||
# return pd.offsets.DateOffset(weeks=1) # type: ignore
|
||||
# elif self is TimeGranularity.MONTH:
|
||||
# return pd.offsets.DateOffset(months=1)
|
||||
# elif self is TimeGranularity.QUARTER:
|
||||
# return pd.offsets.DateOffset(months=3)
|
||||
# elif self is TimeGranularity.YEAR:
|
||||
# return pd.offsets.DateOffset(years=1) # type: ignore
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
@property
|
||||
def format_with_first_or_last(self) -> bool:
|
||||
"""Indicates that this can only be calculated if query results display the first or last date of the period."""
|
||||
return self in [TimeGranularity.MONTH, TimeGranularity.QUARTER, TimeGranularity.YEAR]
|
||||
|
||||
# def is_period_start(self, date: Union[pd.Timestamp, date]) -> bool: # noqa: D
|
||||
# pd_date = pd.Timestamp(date)
|
||||
|
||||
# if self is TimeGranularity.DAY:
|
||||
# return True
|
||||
# elif self is TimeGranularity.WEEK:
|
||||
# return ISOWeekDay.from_pandas_timestamp(pd_date).is_week_start
|
||||
# elif self is TimeGranularity.MONTH:
|
||||
# return pd_date.is_month_start
|
||||
# elif self is TimeGranularity.QUARTER:
|
||||
# return pd_date.is_quarter_start
|
||||
# elif self is TimeGranularity.YEAR:
|
||||
# return pd_date.is_year_start
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
# def is_period_end(self, date: Union[pd.Timestamp, date]) -> bool: # noqa: D
|
||||
# pd_date = pd.Timestamp(date)
|
||||
|
||||
# if self is TimeGranularity.DAY:
|
||||
# return True
|
||||
# elif self is TimeGranularity.WEEK:
|
||||
# return ISOWeekDay.from_pandas_timestamp(pd_date).is_week_end
|
||||
# elif self is TimeGranularity.MONTH:
|
||||
# return pd_date.is_month_end
|
||||
# elif self is TimeGranularity.QUARTER:
|
||||
# return pd_date.is_quarter_end
|
||||
# elif self is TimeGranularity.YEAR:
|
||||
# return pd_date.is_year_end
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
# @property
|
||||
# def period_begin_offset( # noqa: D
|
||||
# self,
|
||||
# ) -> Union[
|
||||
# pd.offsets.MonthBegin, pd.offsets.QuarterBegin, pd.offsets.Week, pd.offsets.YearBegin
|
||||
# ]:
|
||||
# if self is TimeGranularity.DAY:
|
||||
# raise ValueError(f"Can't get period start offset for TimeGranularity.{self.name}.")
|
||||
# elif self is TimeGranularity.WEEK:
|
||||
# return pd.offsets.Week(weekday=ISOWeekDay.MONDAY.pandas_value)
|
||||
# elif self is TimeGranularity.MONTH:
|
||||
# return pd.offsets.MonthBegin()
|
||||
# elif self is TimeGranularity.QUARTER:
|
||||
# return pd.offsets.QuarterBegin(startingMonth=1)
|
||||
# elif self is TimeGranularity.YEAR:
|
||||
# return pd.offsets.YearBegin()
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
# @property
|
||||
# def period_end_offset( # noqa: D
|
||||
# self,
|
||||
# ) -> Union[pd.offsets.MonthEnd, pd.offsets.QuarterEnd, pd.offsets.Week, pd.offsets.YearEnd]:
|
||||
# if self is TimeGranularity.DAY:
|
||||
# raise ValueError(f"Can't get period end offset for TimeGranularity.{self.name}.")
|
||||
# elif self == TimeGranularity.WEEK:
|
||||
# return pd.offsets.Week(weekday=ISOWeekDay.SUNDAY.pandas_value)
|
||||
# elif self is TimeGranularity.MONTH:
|
||||
# return pd.offsets.MonthEnd()
|
||||
# elif self is TimeGranularity.QUARTER:
|
||||
# return pd.offsets.QuarterEnd(startingMonth=3)
|
||||
# elif self is TimeGranularity.YEAR:
|
||||
# return pd.offsets.YearEnd()
|
||||
# else:
|
||||
# assert_values_exhausted(self)
|
||||
|
||||
# def adjust_to_start_of_period(
|
||||
# self, date_to_adjust: pd.Timestamp, rollback: bool = True
|
||||
# ) -> pd.Timestamp:
|
||||
# """Adjust to start of period if not at start already."""
|
||||
# if rollback:
|
||||
# return self.period_begin_offset.rollback(date_to_adjust)
|
||||
# else:
|
||||
# return self.period_begin_offset.rollforward(date_to_adjust)
|
||||
|
||||
# def adjust_to_end_of_period(
|
||||
# self, date_to_adjust: pd.Timestamp, rollforward: bool = True
|
||||
# ) -> pd.Timestamp:
|
||||
# """Adjust to end of period if not at end already."""
|
||||
# if rollforward:
|
||||
# return self.period_end_offset.rollforward(date_to_adjust)
|
||||
# else:
|
||||
# return self.period_end_offset.rollback(date_to_adjust)
|
||||
|
||||
# def match_start_or_end_of_period(
|
||||
# self, date_to_match: pd.Timestamp, date_to_adjust: pd.Timestamp
|
||||
# ) -> pd.Timestamp:
|
||||
# """Adjust date_to_adjust to be start or end of period based on if date_to_match is at start or end of period."""
|
||||
# if self.is_period_start(date_to_match):
|
||||
# return self.adjust_to_start_of_period(date_to_adjust)
|
||||
# elif self.is_period_end(date_to_match):
|
||||
# return self.adjust_to_end_of_period(date_to_adjust)
|
||||
# else:
|
||||
# raise ValueError(
|
||||
# f"Expected `date_to_match` to fall at the start or end of the granularity period. Got '{date_to_match}' for granularity {self}."
|
||||
# )
|
||||
|
||||
def __lt__(self, other: Any) -> bool: # type: ignore [misc] # noqa: D
|
||||
if not isinstance(other, TimeGranularity):
|
||||
return NotImplemented
|
||||
return self.to_int() < other.to_int()
|
||||
|
||||
def __hash__(self) -> int: # noqa: D
|
||||
return self.to_int()
|
||||
|
||||
def __repr__(self) -> str: # noqa: D
|
||||
return f"{self.__class__.__name__}.{self.name}"
|
||||
|
||||
|
||||
class ISOWeekDay(StrEnum):
|
||||
"""Day of week values per ISO standard"""
|
||||
|
||||
MONDAY = 1
|
||||
TUESDAY = 2
|
||||
WEDNESDAY = 3
|
||||
THURSDAY = 4
|
||||
FRIDAY = 5
|
||||
SATURDAY = 6
|
||||
SUNDAY = 7
|
||||
|
||||
# @staticmethod
|
||||
# def from_pandas_timestamp(timestamp: pd.Timestamp) -> ISOWeekDay:
|
||||
# """Factory for streamlining conversion from a Pandas Timestamp to an ISOWeekDay"""
|
||||
# return ISOWeekDay(timestamp.isoweekday())
|
||||
|
||||
@property
|
||||
def is_week_start(self) -> bool:
|
||||
"""Return comparison of instance value against ISO standard start of week (Monday)"""
|
||||
return self is ISOWeekDay.MONDAY
|
||||
|
||||
@property
|
||||
def is_week_end(self) -> bool:
|
||||
"""Return comparison of instance value against ISO standard end of week (Sunday)"""
|
||||
return self is ISOWeekDay.SUNDAY
|
||||
|
||||
@property
|
||||
def pandas_value(self) -> int:
|
||||
"""Returns the pandas int value representation of the ISOWeekDay"""
|
||||
return self.value - 1
|
||||
|
||||
|
||||
def string_to_time_granularity(s: str) -> TimeGranularity: # noqa: D
|
||||
values = {item.value: item for item in TimeGranularity}
|
||||
return values[s]
|
||||
|
||||
|
||||
SUPPORTED_GRANULARITIES = [
|
||||
TimeGranularity.DAY,
|
||||
TimeGranularity.WEEK,
|
||||
TimeGranularity.MONTH,
|
||||
TimeGranularity.QUARTER,
|
||||
TimeGranularity.YEAR,
|
||||
]
|
||||
@@ -0,0 +1,21 @@
|
||||
from abc import ABC
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.semantic.aggregation_properties import AggregationType
|
||||
|
||||
|
||||
class BooleanMeasureAggregation(ABC):
|
||||
"""Converts the expression used in boolean measures so that it can be aggregated."""
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity: # noqa: D
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
if measure.agg == AggregationType.SUM_BOOLEAN:
|
||||
if measure.expr:
|
||||
measure.expr = f"case when {measure.expr} then 1 else 0 end"
|
||||
else:
|
||||
measure.expr = f"case when {measure.name} then 1 else 0 end"
|
||||
|
||||
measure.agg = AggregationType.SUM
|
||||
|
||||
return entity
|
||||
@@ -0,0 +1,28 @@
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from abc import ABC
|
||||
|
||||
|
||||
class CompositeIdentifierExpressionRule(ABC):
|
||||
"""Transform composite sub-identifiers for convenience.
|
||||
If a sub-identifier has no expression, check if an identifier exists
|
||||
with the same name and use that identifier's expression if it has one.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity: # noqa: D
|
||||
for identifier in entity.identifiers:
|
||||
if identifier.identifiers is None or len(identifier.identifiers) == 0:
|
||||
continue
|
||||
|
||||
for sub_identifier in identifier.identifiers:
|
||||
if sub_identifier.name or sub_identifier.expr:
|
||||
continue
|
||||
|
||||
for identifier in entity.identifiers:
|
||||
if sub_identifier.ref == identifier.name:
|
||||
sub_identifier.ref = None
|
||||
sub_identifier.name = identifier.name
|
||||
sub_identifier.expr = identifier.expr
|
||||
break
|
||||
|
||||
return entity
|
||||
@@ -0,0 +1,24 @@
|
||||
from abc import ABC
|
||||
from dbt.semantic.aggregation_properties import AggregationType
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
|
||||
ONE = "1"
|
||||
|
||||
|
||||
class ConvertCountToSum(ABC):
|
||||
"""Converts any COUNT measures to SUM equivalent."""
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity: # noqa: D
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
if measure.agg == AggregationType.COUNT:
|
||||
# NOTE: Removed if expr none error because dbt metric design encourages count on
|
||||
# columns, not requiring an expression. This makes it easier for users.
|
||||
if measure.expr is None:
|
||||
measure.expr = f"case when {measure.name} is not null then 1 else 0 end"
|
||||
elif measure.expr != ONE:
|
||||
# Just leave it as SUM(1) if we want to count all
|
||||
measure.expr = f"case when {measure.expr} is not null then 1 else 0 end"
|
||||
measure.agg = AggregationType.SUM
|
||||
return entity
|
||||
@@ -0,0 +1,39 @@
|
||||
from abc import ABC
|
||||
from dbt.semantic.aggregation_properties import AggregationType
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.measures import MeasureAggregationParameters
|
||||
from dbt.exceptions import DbtSemanticValidationError
|
||||
|
||||
MEDIAN_PERCENTILE = 0.5
|
||||
|
||||
|
||||
class ConvertMedianToPercentile(ABC):
|
||||
"""Converts any MEDIAN measures to percentile equivalent."""
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity: # noqa: D
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
if measure.agg == AggregationType.MEDIAN:
|
||||
measure.agg = AggregationType.PERCENTILE
|
||||
|
||||
if not measure.agg_params:
|
||||
measure.agg_params = MeasureAggregationParameters()
|
||||
else:
|
||||
if (
|
||||
measure.agg_params.percentile is not None
|
||||
and measure.agg_params.percentile != 0.5
|
||||
):
|
||||
raise DbtSemanticValidationError(
|
||||
f"Measure '{measure.name}' uses a MEDIAN aggregation, while percentile is set to "
|
||||
f"'{measure.agg_params.percentile}', a conflicting value. Please remove the parameter "
|
||||
"or set to '0.5'."
|
||||
)
|
||||
if measure.agg_params.use_discrete_percentile:
|
||||
raise DbtSemanticValidationError(
|
||||
f"Measure '{measure.name}' uses a MEDIAN aggregation, while use_discrete_percentile"
|
||||
f"is set to true. Please remove the parameter or set to False."
|
||||
)
|
||||
measure.agg_params.percentile = MEDIAN_PERCENTILE
|
||||
# let's not set use_approximate_percentile to be false due to valid performance reasons
|
||||
return entity
|
||||
@@ -0,0 +1,21 @@
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from abc import ABC
|
||||
|
||||
|
||||
class LowerCaseNames(ABC):
|
||||
"""Lowercases the names of both top level objects and entity elements"""
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity:
|
||||
"""Lowercases the names of data source elements."""
|
||||
entity.name = entity.name.lower()
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
measure.name = measure.name.lower()
|
||||
if entity.identifiers:
|
||||
for identifier in entity.identifiers:
|
||||
identifier.name = identifier.name.lower()
|
||||
if entity.dimensions:
|
||||
for dimension in entity.dimensions:
|
||||
dimension.name = dimension.name.lower()
|
||||
return entity
|
||||
@@ -0,0 +1,35 @@
|
||||
from abc import ABC
|
||||
from typing import Optional
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.dimensions import DimensionType
|
||||
from dbt.semantic.references import TimeDimensionReference
|
||||
|
||||
|
||||
class SetMeasureAggregationTimeDimension(ABC):
|
||||
"""Sets the aggregation time dimension for measures to the primary time dimension if not defined."""
|
||||
|
||||
@staticmethod
|
||||
def _find_primary_time_dimension(entity: Entity) -> Optional[TimeDimensionReference]:
|
||||
for dimension in entity.dimensions:
|
||||
if (
|
||||
dimension.type == DimensionType.TIME
|
||||
and dimension.type_params
|
||||
and dimension.type_params.is_primary
|
||||
):
|
||||
return dimension.time_dimension_reference
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _transform_entity(entity: Entity) -> Entity: # noqa: D
|
||||
|
||||
primary_time_dimension_reference = (
|
||||
SetMeasureAggregationTimeDimension._find_primary_time_dimension(entity=entity)
|
||||
)
|
||||
|
||||
if primary_time_dimension_reference:
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
if not measure.agg_time_dimension:
|
||||
measure.agg_time_dimension = primary_time_dimension_reference.name
|
||||
|
||||
return entity
|
||||
@@ -0,0 +1,98 @@
|
||||
from dbt.contracts.graph.metrics import MetricType, MetricInputMeasure, MetricTypeParams
|
||||
from dbt.node_types import NodeType
|
||||
from dbt.contracts.graph.nodes import Metric, Entity
|
||||
from dbt.clients.jinja import get_rendered
|
||||
from abc import ABC
|
||||
from typing import List
|
||||
from dbt.context.providers import (
|
||||
generate_parse_metrics,
|
||||
)
|
||||
|
||||
|
||||
class ProxyMeasure(ABC):
|
||||
"""All the functionality needed to convert measures to metrics"""
|
||||
|
||||
@staticmethod
|
||||
def _create_proxy_metrics(parser, parsed_entity: Entity, path: str, fqn: List):
|
||||
if parsed_entity.measures:
|
||||
for measure in parsed_entity.measures:
|
||||
if measure.create_metric:
|
||||
add_metric = True
|
||||
package_name = parser.project.project_name
|
||||
unique_id = f"{NodeType.Metric}.{package_name}.{measure.name}"
|
||||
original_file_path = parser.yaml.path.original_file_path
|
||||
fqn[2] = measure.name
|
||||
|
||||
# TODO: Figure out new location in validation
|
||||
# if parsed_entity.metrics:
|
||||
# breakpoint()
|
||||
# for metric in parsed_entity.metrics:
|
||||
# if metric == measure.name:
|
||||
# if metric.type != MetricType.MEASURE_PROXY:
|
||||
# raise DbtValidationError(
|
||||
# f"Cannot have metric with the same name as a measure ({measure.name}) that is not a "
|
||||
# f"proxy for that measure"
|
||||
# )
|
||||
# add_metric = False
|
||||
|
||||
config = parser._generate_proxy_metric_config(
|
||||
target=measure,
|
||||
fqn=fqn,
|
||||
package_name=package_name,
|
||||
rendered=True,
|
||||
)
|
||||
|
||||
config = config.finalize_and_validate()
|
||||
|
||||
unrendered_config = parser._generate_proxy_metric_config(
|
||||
target=measure,
|
||||
fqn=fqn,
|
||||
package_name=package_name,
|
||||
rendered=False,
|
||||
)
|
||||
|
||||
if measure.expr:
|
||||
measure_expr = measure.expr
|
||||
else:
|
||||
measure_expr = measure.name
|
||||
|
||||
if add_metric:
|
||||
proxy_metric = Metric(
|
||||
resource_type=NodeType.Metric,
|
||||
package_name=package_name,
|
||||
path=path,
|
||||
original_file_path=original_file_path,
|
||||
unique_id=unique_id,
|
||||
fqn=fqn,
|
||||
name=measure.name,
|
||||
constraint=None,
|
||||
entity="entity('" + parsed_entity.name + "')",
|
||||
description=measure.description,
|
||||
type=MetricType.MEASURE_PROXY,
|
||||
type_params=MetricTypeParams(
|
||||
measure=MetricInputMeasure(name=measure.name),
|
||||
expr=measure_expr,
|
||||
),
|
||||
meta=measure.meta,
|
||||
tags=measure.tags,
|
||||
config=config,
|
||||
unrendered_config=unrendered_config,
|
||||
)
|
||||
|
||||
proxy_ctx = generate_parse_metrics(
|
||||
proxy_metric,
|
||||
parser.root_project,
|
||||
parser.schema_parser.manifest,
|
||||
package_name,
|
||||
)
|
||||
|
||||
if proxy_metric.entity is not None:
|
||||
entity_ref = "{{ " + proxy_metric.entity + " }}"
|
||||
get_rendered(entity_ref, proxy_ctx, proxy_metric)
|
||||
|
||||
if proxy_metric.config.enabled:
|
||||
parser.manifest.add_metric(parser.yaml.file, proxy_metric)
|
||||
else:
|
||||
parser.manifest.add_disabled(parser.yaml.file, proxy_metric)
|
||||
|
||||
return parser
|
||||
@@ -0,0 +1,42 @@
|
||||
from typing import Set, List
|
||||
from abc import ABC
|
||||
from dbt.exceptions import DbtSemanticValidationError
|
||||
from dbt.contracts.graph.metrics import MetricType, MetricInputMeasure
|
||||
from dbt.contracts.graph.nodes import Metric
|
||||
|
||||
|
||||
class AddInputMetricMeasures(ABC):
|
||||
"""Add all measures corresponding to the input metrics of the derived metric."""
|
||||
|
||||
@staticmethod
|
||||
def _get_measures_for_metric(
|
||||
metric_name: str, metrics: List[Metric]
|
||||
) -> Set[MetricInputMeasure]:
|
||||
"""Returns a unique set of input measures for a given metric."""
|
||||
measures = set()
|
||||
metrics_generator = (metric for metric in metrics if metric.name == metric_name)
|
||||
matched_metric = next(iter(metrics_generator), None)
|
||||
if matched_metric:
|
||||
if matched_metric.type == MetricType.DERIVED:
|
||||
for input_metric in matched_metric.input_metrics:
|
||||
measures.update(
|
||||
AddInputMetricMeasures._get_measures_for_metric(input_metric.name, metrics)
|
||||
)
|
||||
else:
|
||||
measures.update(set(matched_metric.input_measures))
|
||||
else:
|
||||
raise DbtSemanticValidationError(
|
||||
f"Metric '{metric_name}' is not configured as a metric in the model."
|
||||
)
|
||||
return measures
|
||||
|
||||
@staticmethod
|
||||
def add_input_metrics(metric: Metric, metrics: List[Metric]) -> Metric: # noqa: D
|
||||
if metric.type == MetricType.DERIVED:
|
||||
measures = AddInputMetricMeasures._get_measures_for_metric(metric.name, metrics)
|
||||
if metric.type_params.measures is None:
|
||||
raise DbtSemanticValidationError(
|
||||
f"Metric '{metric.name}' is derived, which cannot have measures predefined in config."
|
||||
)
|
||||
metric.type_params.measures = list(measures)
|
||||
return metric
|
||||
@@ -0,0 +1,98 @@
|
||||
from typing import Union, List
|
||||
from abc import ABC
|
||||
from dbt.contracts.graph.metrics import (
|
||||
UnparsedMetricInputMeasure,
|
||||
MetricInputMeasure,
|
||||
MetricTimeWindow,
|
||||
UnparsedMetricInput,
|
||||
MetricInput,
|
||||
UnparsedMetricTypeParams,
|
||||
MetricTypeParams,
|
||||
)
|
||||
from dbt.semantic.constraints import WhereClauseConstraint
|
||||
|
||||
|
||||
class ConvertTypeParams(ABC):
|
||||
"""All the functionality needed to convert UnparsedMetricTypeParams to MetricTypeParams"""
|
||||
|
||||
@staticmethod
|
||||
def _get_parameter(
|
||||
parameter: Union[UnparsedMetricInputMeasure, str, None]
|
||||
) -> MetricInputMeasure:
|
||||
if isinstance(parameter, str):
|
||||
return MetricInputMeasure(name=parameter)
|
||||
elif isinstance(parameter, UnparsedMetricInputMeasure):
|
||||
return MetricInputMeasure(
|
||||
name=parameter.name,
|
||||
constraint=WhereClauseConstraint.parse(parameter.constraint),
|
||||
alias=parameter.alias,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_parameters(
|
||||
parameters: List[Union[UnparsedMetricInputMeasure, str]]
|
||||
) -> List[MetricInputMeasure]:
|
||||
parameters_list = []
|
||||
if parameters:
|
||||
for parameter in parameters:
|
||||
if isinstance(parameter, str):
|
||||
parameters_list.append(MetricInputMeasure(name=parameter))
|
||||
elif isinstance(parameter, UnparsedMetricInputMeasure):
|
||||
parameters_list.append(
|
||||
MetricInputMeasure(
|
||||
name=parameter.name,
|
||||
constraint=WhereClauseConstraint.parse(parameter.constraint),
|
||||
alias=parameter.alias,
|
||||
)
|
||||
)
|
||||
return parameters_list
|
||||
else:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _get_window_parameter(parameter: Union[MetricTimeWindow, str, None]):
|
||||
if isinstance(parameter, str):
|
||||
return MetricTimeWindow.parse(window=parameter)
|
||||
elif isinstance(parameter, MetricTimeWindow):
|
||||
return parameter
|
||||
else:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_metric_parameters(
|
||||
parameters: List[Union[UnparsedMetricInput, str]]
|
||||
) -> List[MetricInput]:
|
||||
parameters_list = []
|
||||
if parameters:
|
||||
for parameter in parameters:
|
||||
if isinstance(parameter, str):
|
||||
parameters_list.append(MetricInput(name=parameter))
|
||||
elif isinstance(parameter, UnparsedMetricInput):
|
||||
parameters_list.append(
|
||||
MetricInput(
|
||||
name=parameter.name,
|
||||
constraint=parameter.constraint,
|
||||
alias=parameter.alias,
|
||||
offset_window=parameter.offset_window,
|
||||
offset_to_grain=parameter.offset_to_grain,
|
||||
)
|
||||
)
|
||||
return parameters_list
|
||||
else:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _get_metric_type_params(type_params: UnparsedMetricTypeParams) -> MetricTypeParams:
|
||||
|
||||
parsed_type_params = MetricTypeParams(
|
||||
measure=ConvertTypeParams._get_parameter(type_params.measure),
|
||||
measures=ConvertTypeParams._get_parameters(type_params.measures),
|
||||
numerator=ConvertTypeParams._get_parameter(type_params.numerator),
|
||||
denominator=ConvertTypeParams._get_parameter(type_params.denominator),
|
||||
expr=type_params.expr,
|
||||
window=ConvertTypeParams._get_window_parameter(type_params.window),
|
||||
grain_to_date=type_params.grain_to_date,
|
||||
metrics=ConvertTypeParams._get_metric_parameters(type_params.metrics),
|
||||
)
|
||||
|
||||
return parsed_type_params
|
||||
15
core/dbt/semantic/user_configured_model.py
Normal file
15
core/dbt/semantic/user_configured_model.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from typing import List
|
||||
from dataclasses import dataclass, field
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
from dbt.contracts.graph.nodes import Entity, Metric
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserConfiguredModel(dbtClassMixin):
|
||||
"""Model holds all the information the SemanticLayer needs to render a query"""
|
||||
|
||||
entities: List[Entity] = field(default_factory=list)
|
||||
metrics: List[Metric] = field(default_factory=list)
|
||||
|
||||
def _serialize(self):
|
||||
return self.to_dict()
|
||||
61
core/dbt/semantic/validations/agg_time_dimensions.py
Normal file
61
core/dbt/semantic/validations/agg_time_dimensions.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from typing import List
|
||||
|
||||
from dbt.semantic.references import EntityElementReference, TimeDimensionReference
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.dimensions import DimensionType
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationIssueType,
|
||||
ValidationError,
|
||||
)
|
||||
|
||||
|
||||
class AggregationTimeDimensionRule(ModelValidationRule):
|
||||
"""Checks that the aggregation time dimension for a measure points to a valid time dimension in the entity."""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
for entity in model.entities:
|
||||
issues.extend(AggregationTimeDimensionRule._validate_entity(entity))
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _time_dimension_in_model(
|
||||
time_dimension_reference: TimeDimensionReference, entity: Entity
|
||||
) -> bool:
|
||||
for dimension in entity.dimensions:
|
||||
if (
|
||||
dimension.type == DimensionType.TIME
|
||||
and dimension.name == time_dimension_reference.name
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for measure in entity.measures:
|
||||
measure_context = EntityElementContext(
|
||||
entity_element=EntityElementReference(entity_name=entity.name, name=measure.name),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
)
|
||||
agg_time_dimension_reference = measure.checked_agg_time_dimension
|
||||
if not AggregationTimeDimensionRule._time_dimension_in_model(
|
||||
time_dimension_reference=agg_time_dimension_reference, entity=entity
|
||||
):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=measure_context,
|
||||
message=f"In entity '{entity.name}', measure '{measure.name}' has the aggregation "
|
||||
f"time dimension set to '{agg_time_dimension_reference.name}', "
|
||||
f"which is not a valid time dimension in the entity",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
78
core/dbt/semantic/validations/common_identifiers.py
Normal file
78
core/dbt/semantic/validations/common_identifiers.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from typing import Dict, List, Set
|
||||
from dbt.semantic.references import EntityElementReference, IdentifierReference
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.identifiers import Identifier
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationWarning,
|
||||
validate_safely,
|
||||
ValidationIssueType,
|
||||
)
|
||||
|
||||
|
||||
class CommonIdentifiersRule(ModelValidationRule):
|
||||
"""Checks that identifiers exist on more than one entity"""
|
||||
|
||||
@staticmethod
|
||||
def _map_entity_identifiers(entities: List[Entity]) -> Dict[IdentifierReference, Set[str]]:
|
||||
"""Generate mapping of identifier names to the set of entities where it is defined"""
|
||||
identifiers_to_entities: Dict[IdentifierReference, Set[str]] = {}
|
||||
for entity in entities or []:
|
||||
for identifier in entity.identifiers or []:
|
||||
if identifier.reference in identifiers_to_entities:
|
||||
identifiers_to_entities[identifier.reference].add(entity.name)
|
||||
else:
|
||||
identifiers_to_entities[identifier.reference] = {entity.name}
|
||||
return identifiers_to_entities
|
||||
|
||||
@staticmethod
|
||||
@validate_safely(whats_being_done="checking identifier exists on more than one entity")
|
||||
def _check_identifier(
|
||||
identifier: Identifier,
|
||||
entity: Entity,
|
||||
identifiers_to_entities: Dict[IdentifierReference, Set[str]],
|
||||
) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
# If the identifier is the dict and if the set of entitys minus this entity is empty,
|
||||
# then we warn the user that their identifier will be unused in joins
|
||||
if (
|
||||
identifier.reference in identifiers_to_entities
|
||||
and len(identifiers_to_entities[identifier.reference].difference({entity.name})) == 0
|
||||
):
|
||||
issues.append(
|
||||
ValidationWarning(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=identifier.name
|
||||
),
|
||||
element_type=EntityElementType.IDENTIFIER,
|
||||
),
|
||||
message=f"Identifier `{identifier.reference.name}` "
|
||||
f"only found in one entity `{entity.name}` "
|
||||
f"which means it will be unused in joins.",
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
@validate_safely(
|
||||
whats_being_done="running model validation warning if identifiers are only one one entity"
|
||||
)
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Issues a warning for any identifier that is associated with only one entity"""
|
||||
issues = []
|
||||
|
||||
identifiers_to_entities = CommonIdentifiersRule._map_entity_identifiers(model.entities)
|
||||
for entity in model.entities or []:
|
||||
for identifier in entity.identifiers or []:
|
||||
issues += CommonIdentifiersRule._check_identifier(
|
||||
identifier=identifier,
|
||||
entity=entity,
|
||||
identifiers_to_entities=identifiers_to_entities,
|
||||
)
|
||||
|
||||
return issues
|
||||
148
core/dbt/semantic/validations/dimension_const.py
Normal file
148
core/dbt/semantic/validations/dimension_const.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from typing import Dict, List
|
||||
from dbt.semantic.references import EntityElementReference, DimensionReference
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.dimensions import Dimension, DimensionType
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
DimensionInvariants,
|
||||
ValidationIssueType,
|
||||
ValidationError,
|
||||
)
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.time import TimeGranularity
|
||||
|
||||
|
||||
class DimensionConsistencyRule(ModelValidationRule):
|
||||
"""Checks for consistent dimension properties in the entitys in a model.
|
||||
|
||||
* Dimensions with the same name should be of the same type.
|
||||
* Dimensions with the same name should be either all partitions or not.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
dimension_to_invariant: Dict[DimensionReference, DimensionInvariants] = {}
|
||||
time_dims_to_granularity: Dict[DimensionReference, TimeGranularity] = {}
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for entity in model.entities:
|
||||
issues += DimensionConsistencyRule._validate_entity(
|
||||
entity=entity,
|
||||
dimension_to_invariant=dimension_to_invariant,
|
||||
update_invariant_dict=True,
|
||||
)
|
||||
|
||||
for dimension in entity.dimensions:
|
||||
issues += DimensionConsistencyRule._validate_dimension(
|
||||
dimension=dimension,
|
||||
time_dims_to_granularity=time_dims_to_granularity,
|
||||
entity=entity,
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_dimension(
|
||||
dimension: Dimension,
|
||||
time_dims_to_granularity: Dict[DimensionReference, TimeGranularity],
|
||||
entity: Entity,
|
||||
) -> List[ValidationIssueType]:
|
||||
"""Checks that time dimensions of the same name that aren't primary have the same time granularity specifications
|
||||
|
||||
Args:
|
||||
dimension: the dimension to check
|
||||
time_dims_to_granularity: a dict from the dimension to the time granularity it should have
|
||||
entity: the associated entity. Used for generated issue messages
|
||||
"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
context = EntityElementContext(
|
||||
entity_element=EntityElementReference(entity_name=entity.name, name=dimension.name),
|
||||
element_type=EntityElementType.DIMENSION,
|
||||
)
|
||||
|
||||
if dimension.type == DimensionType.TIME:
|
||||
if dimension.reference not in time_dims_to_granularity and dimension.type_params:
|
||||
time_dims_to_granularity[
|
||||
dimension.reference
|
||||
] = dimension.type_params.time_granularity
|
||||
|
||||
# The primary time dimension can be of different time granularities, so don't check for it.
|
||||
if (
|
||||
dimension.type_params is not None
|
||||
and not dimension.type_params.is_primary
|
||||
and dimension.type_params.time_granularity
|
||||
!= time_dims_to_granularity[dimension.reference]
|
||||
):
|
||||
expected_granularity = time_dims_to_granularity[dimension.reference]
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"Time granularity must be the same for time dimensions with the same name. "
|
||||
f"Problematic dimension: {dimension.name} in entity with name: "
|
||||
f"`{entity.name}`. Expected granularity is {expected_granularity.name}.",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity(
|
||||
entity: Entity,
|
||||
dimension_to_invariant: Dict[DimensionReference, DimensionInvariants],
|
||||
update_invariant_dict: bool,
|
||||
) -> List[ValidationIssueType]:
|
||||
"""Checks that the given entity has dimensions consistent with the given invariants.
|
||||
|
||||
Args:
|
||||
entity: the entity to check
|
||||
dimension_to_invariant: a dict from the dimension name to the properties it should have
|
||||
update_invariant_dict: whether to insert an entry into the dict if the given dimension name doesn't exist.
|
||||
"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for dimension in entity.dimensions:
|
||||
dimension_invariant = dimension_to_invariant.get(dimension.reference)
|
||||
|
||||
if dimension_invariant is None:
|
||||
if update_invariant_dict:
|
||||
dimension_invariant = DimensionInvariants(
|
||||
dimension.type, dimension.is_partition or False
|
||||
)
|
||||
dimension_to_invariant[dimension.reference] = dimension_invariant
|
||||
continue
|
||||
# TODO: Can't check for unknown dimensions easily as the name follows <id>__<name> format.
|
||||
# e.g. user__created_at
|
||||
continue
|
||||
|
||||
# is_partition might not be specified in the configs, so default to False.
|
||||
is_partition = dimension.is_partition or False
|
||||
|
||||
context = EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=dimension.name
|
||||
),
|
||||
element_type=EntityElementType.DIMENSION,
|
||||
)
|
||||
|
||||
if dimension_invariant.type != dimension.type:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"In entity `{entity.name}`, type conflict for dimension `{dimension.name}` "
|
||||
f"- already in model as type `{dimension_invariant.type}` but got `{dimension.type}`",
|
||||
)
|
||||
)
|
||||
if dimension_invariant.is_partition != is_partition:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"In entity `{entity.name}, conflicting is_partition attribute for dimension "
|
||||
f"`{dimension.reference}` - already in model"
|
||||
f" with is_partition as `{dimension_invariant.is_partition}` but got "
|
||||
f"`{is_partition}``",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
79
core/dbt/semantic/validations/element_const.py
Normal file
79
core/dbt/semantic/validations/element_const.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from collections import defaultdict
|
||||
from typing import List, DefaultDict
|
||||
from dbt.semantic.references import EntityReference
|
||||
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationError,
|
||||
ValidationIssueType,
|
||||
)
|
||||
|
||||
|
||||
class ElementConsistencyRule(ModelValidationRule):
|
||||
"""Checks that elements in data sources with the same name are of the same element type across the model
|
||||
|
||||
This reduces the potential confusion that might arise from having an identifier named `country` and a dimension
|
||||
named `country` while allowing for things like the `user` identifier to exist in multiple data sources. Note not
|
||||
all element types allow duplicates, and there are separate validation rules for those cases. See, for example,
|
||||
the DataSourceMeasuresUniqueRule.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues = []
|
||||
name_to_types = ElementConsistencyRule._get_name_to_types(model=model)
|
||||
invalid_elements = {
|
||||
name: type_mapping
|
||||
for name, type_mapping in name_to_types.items()
|
||||
if len(type_mapping) > 1
|
||||
}
|
||||
|
||||
for name, type_to_context in invalid_elements.items():
|
||||
# Sort these by value to ensure consistent error messaging
|
||||
types_used = [
|
||||
EntityElementType(v) for v in sorted(k.value for k in type_to_context.keys())
|
||||
]
|
||||
value_types_used = [type.value for type in types_used]
|
||||
for element_type in types_used:
|
||||
entity_contexts = type_to_context[element_type]
|
||||
entity_names = {ctx.entity.entity_name for ctx in entity_contexts}
|
||||
entity_context = entity_contexts[0]
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=entity_context,
|
||||
message=f"In data sources {entity_names}, element `{name}` is of type "
|
||||
f"{element_type.value}, but it is used as types {value_types_used} across the model.",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _get_name_to_types(
|
||||
model: UserConfiguredModel,
|
||||
) -> DefaultDict[str, DefaultDict[EntityElementType, List[EntityContext]]]:
|
||||
"""Create a mapping of all element names in the model to types with a list of associated EntityContexts"""
|
||||
element_types: DefaultDict[
|
||||
str, DefaultDict[EntityElementType, List[EntityContext]]
|
||||
] = defaultdict(lambda: defaultdict(list))
|
||||
for entity in model.entities:
|
||||
entity_context = EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
)
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
element_types[measure.name][EntityElementType.MEASURE].append(entity_context)
|
||||
if entity.dimensions:
|
||||
for dimension in entity.dimensions:
|
||||
element_types[dimension.name][EntityElementType.DIMENSION].append(
|
||||
entity_context
|
||||
)
|
||||
if entity.identifiers:
|
||||
for identifier in entity.identifiers:
|
||||
element_types[identifier.name][EntityElementType.IDENTIFIER].append(
|
||||
entity_context
|
||||
)
|
||||
return element_types
|
||||
216
core/dbt/semantic/validations/entities.py
Normal file
216
core/dbt/semantic/validations/entities.py
Normal file
@@ -0,0 +1,216 @@
|
||||
from typing import List
|
||||
from dbt.semantic.references import EntityElementReference, EntityReference
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.dimensions import DimensionType
|
||||
from dbt.contracts.graph.identifiers import IdentifierType
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityContext,
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationIssueType,
|
||||
ValidationError,
|
||||
)
|
||||
from dbt.semantic.time import SUPPORTED_GRANULARITIES
|
||||
|
||||
|
||||
class EntityTimeDimensionWarningsRule(ModelValidationRule):
|
||||
"""Checks time dimensions in entities."""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for entity in model.entities:
|
||||
issues.extend(EntityTimeDimensionWarningsRule._validate_entity(entity=entity))
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
primary_time_dimensions = []
|
||||
|
||||
for dim in entity.dimensions:
|
||||
context = EntityElementContext(
|
||||
entity_element=EntityElementReference(entity_name=entity.name, name=dim.name),
|
||||
element_type=EntityElementType.DIMENSION,
|
||||
)
|
||||
|
||||
if dim.type == DimensionType.TIME:
|
||||
if dim.type_params is None:
|
||||
continue
|
||||
elif dim.type_params.is_primary:
|
||||
primary_time_dimensions.append(dim)
|
||||
elif dim.type_params.time_granularity:
|
||||
if dim.type_params.time_granularity not in SUPPORTED_GRANULARITIES:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"Unsupported time granularity in time dimension with name: {dim.name}, "
|
||||
f"Please use {[s.value for s in SUPPORTED_GRANULARITIES]}",
|
||||
)
|
||||
)
|
||||
|
||||
# A entity must have a primary time dimension if it has
|
||||
# any measures that don't have an `agg_time_dimension` set
|
||||
if (
|
||||
len(primary_time_dimensions) == 0
|
||||
and len(entity.measures) > 0
|
||||
and any(measure.agg_time_dimension is None for measure in entity.measures)
|
||||
):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
),
|
||||
message=f"No primary time dimension in entity with name ({entity.name}). Please add one",
|
||||
)
|
||||
)
|
||||
|
||||
if len(primary_time_dimensions) > 1:
|
||||
for primary_time_dimension in primary_time_dimensions:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
),
|
||||
message=f"In entity {entity.name}, "
|
||||
f"Primary time dimension with name: {primary_time_dimension.name} "
|
||||
f"is one of many defined as primary.",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class EntityValidityWindowRule(ModelValidationRule):
|
||||
"""Checks validity windows in entitys to ensure they comply with runtime requirements"""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Checks the validity param definitions in every entity in the model"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for entity in model.entities:
|
||||
issues.extend(EntityValidityWindowRule._validate_entity(entity=entity))
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
|
||||
"""Runs assertions on entities with validity parameters set on one or more time dimensions"""
|
||||
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
validity_param_dims = [dim for dim in entity.dimensions if dim.validity_params is not None]
|
||||
|
||||
if not validity_param_dims:
|
||||
return issues
|
||||
|
||||
context = EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
)
|
||||
requirements = (
|
||||
"Data sources using dimension validity params to define a validity window must have exactly two time "
|
||||
"dimensions with validity params specified - one marked `is_start` and the other marked `is_end`."
|
||||
)
|
||||
validity_param_dimension_names = [dim.name for dim in validity_param_dims]
|
||||
start_dim_names = [
|
||||
dim.name
|
||||
for dim in validity_param_dims
|
||||
if dim.validity_params and dim.validity_params.is_start
|
||||
]
|
||||
end_dim_names = [
|
||||
dim.name
|
||||
for dim in validity_param_dims
|
||||
if dim.validity_params and dim.validity_params.is_end
|
||||
]
|
||||
num_start_dims = len(start_dim_names)
|
||||
num_end_dims = len(end_dim_names)
|
||||
|
||||
if len(validity_param_dims) == 1 and num_start_dims == 1 and num_end_dims == 1:
|
||||
# Defining a single point window, such as one might find in a daily snapshot table keyed on date,
|
||||
# is not currently supported.
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has a single validity param dimension that defines its window: "
|
||||
f"`{validity_param_dimension_names[0]}`. This is not a currently supported configuration! "
|
||||
f"{requirements} If you have one column defining a window, as in a daily snapshot table, you can "
|
||||
f"define a separate dimension and increment the time value in the `expr` field as a work-around."
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
elif len(validity_param_dims) != 2:
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has {len(validity_param_dims)} dimensions defined with validity "
|
||||
f"params. They are: {validity_param_dimension_names}. There must be either zero or two! "
|
||||
f"If you wish to define a validity window for this entity, please follow these requirements: "
|
||||
f"{requirements}"
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
elif num_start_dims != 1 or num_end_dims != 1:
|
||||
# Validity windows must define both a start and an end, and there should be exactly one
|
||||
start_dim_names = []
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has two validity param dimensions defined, but does not have "
|
||||
f"exactly one each marked with is_start and is_end! Dimensions: {validity_param_dimension_names}. "
|
||||
f"is_start dimensions: {start_dim_names}. is_end dimensions: {end_dim_names}. {requirements}"
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
|
||||
primary_or_unique_identifiers = [
|
||||
identifier
|
||||
for identifier in entity.identifiers
|
||||
if identifier.type in (IdentifierType.PRIMARY, IdentifierType.UNIQUE)
|
||||
]
|
||||
if not any(
|
||||
[identifier.type is IdentifierType.NATURAL for identifier in entity.identifiers]
|
||||
):
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has validity param dimensions defined, but does not have an "
|
||||
f"identifier with type `natural` set. The natural key for this entity is what we use to "
|
||||
f"process a validity window join. Primary or unique identifiers, if any, might be suitable for "
|
||||
f"use as natural keys: ({[identifier.name for identifier in primary_or_unique_identifiers]})."
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
|
||||
if primary_or_unique_identifiers:
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has validity param dimensions defined and also has one or more "
|
||||
f"identifiers designated as `primary` or `unique`. This is not yet supported, as we do not "
|
||||
f"currently process joins against these key types for entitys with validity windows "
|
||||
f"specified."
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
|
||||
if entity.measures:
|
||||
# Temporarily block measure definitions in entitys with validity windows set
|
||||
measure_names = [measure.name for measure in entity.measures]
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Data source {entity.name} has both measures and validity param dimensions defined. This "
|
||||
f"is not currently supported! Please remove either the measures or the validity params. "
|
||||
f"Measure names: {measure_names}. Validity param dimension names: "
|
||||
f"{validity_param_dimension_names}."
|
||||
),
|
||||
)
|
||||
issues.append(error)
|
||||
|
||||
return issues
|
||||
257
core/dbt/semantic/validations/identifiers.py
Normal file
257
core/dbt/semantic/validations/identifiers.py
Normal file
@@ -0,0 +1,257 @@
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import List, MutableSet, Tuple, Sequence, DefaultDict
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.contracts.graph.identifiers import Identifier, IdentifierType, CompositeSubIdentifier
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityContext,
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationError,
|
||||
ValidationIssueType,
|
||||
ValidationWarning,
|
||||
iter_flatten,
|
||||
)
|
||||
from dbt.semantic.references import IdentifierReference, EntityElementReference, EntityReference
|
||||
|
||||
|
||||
class IdentifierConfigRule(ModelValidationRule):
|
||||
"""Checks that entity identifiers are valid"""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues = []
|
||||
for entity in model.entities:
|
||||
issues += IdentifierConfigRule._validate_entity_identifiers(entity=entity)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity_identifiers(entity: Entity) -> List[ValidationIssueType]:
|
||||
"""Checks validity of composite identifiers"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
for ident in entity.identifiers:
|
||||
if ident.identifiers:
|
||||
context = EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=ident.name
|
||||
),
|
||||
element_type=EntityElementType.IDENTIFIER,
|
||||
)
|
||||
|
||||
for sub_id in ident.identifiers:
|
||||
if sub_id.ref and (sub_id.name or sub_id.expr):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"Both ref and name/expr set in sub identifier of identifier "
|
||||
f"({ident.name}), please set one",
|
||||
)
|
||||
)
|
||||
elif sub_id.ref is not None and sub_id.ref not in [
|
||||
i.name for i in entity.identifiers
|
||||
]:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"Identifier ref must reference an existing identifier by name. "
|
||||
f"No identifier in this entity has name: {sub_id.ref}",
|
||||
)
|
||||
)
|
||||
elif not sub_id.ref and not sub_id.name:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"Must provide either name or ref for sub identifier of identifier "
|
||||
f"with name: {ident.reference.name}",
|
||||
)
|
||||
)
|
||||
|
||||
if sub_id.name:
|
||||
for i in entity.identifiers:
|
||||
if i.name == sub_id.name and i.expr != sub_id.expr:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=f"If sub identifier has same name ({sub_id.name}) "
|
||||
f"as an existing Identifier they must have the same expr",
|
||||
)
|
||||
)
|
||||
break
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class NaturalIdentifierConfigurationRule(ModelValidationRule):
|
||||
"""Ensures that identifiers marked as IdentifierType.NATURAL are configured correctly"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity_natural_identifiers(entity: Entity) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
context = EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
)
|
||||
|
||||
natural_identifier_names = set(
|
||||
[
|
||||
identifier.name
|
||||
for identifier in entity.identifiers
|
||||
if identifier.type is IdentifierType.NATURAL
|
||||
]
|
||||
)
|
||||
if len(natural_identifier_names) > 1:
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=f"Entities can have at most one natural identifier, but entity "
|
||||
f"`{entity.name}` has {len(natural_identifier_names)} distinct natural identifiers set! "
|
||||
f"{natural_identifier_names}.",
|
||||
)
|
||||
issues.append(error)
|
||||
if natural_identifier_names and not [
|
||||
dim for dim in entity.dimensions if dim.validity_params
|
||||
]:
|
||||
error = ValidationError(
|
||||
context=context,
|
||||
message=f"The use of `natural` identifiers is currently supported only in conjunction with a validity "
|
||||
f"window defined in the set of time dimensions associated with the entity. entity "
|
||||
f"`{entity.name}` uses a natural identifier ({natural_identifier_names}) but does not define a "
|
||||
f"validity window!",
|
||||
)
|
||||
issues.append(error)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Validate identifiers marked as IdentifierType.NATURAL"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
for entity in model.entities:
|
||||
issues += NaturalIdentifierConfigurationRule._validate_entity_natural_identifiers(
|
||||
entity=entity
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class OnePrimaryIdentifierPerEntityRule(ModelValidationRule):
|
||||
"""Ensures that each entity has only one primary identifier"""
|
||||
|
||||
@staticmethod
|
||||
def _only_one_primary_identifier(entity: Entity) -> List[ValidationIssueType]:
|
||||
primary_identifier_names: MutableSet[str] = set()
|
||||
for identifier in entity.identifiers or []:
|
||||
if identifier.type == IdentifierType.PRIMARY:
|
||||
primary_identifier_names.add(identifier.reference.name)
|
||||
|
||||
if len(primary_identifier_names) > 1:
|
||||
return [
|
||||
ValidationError(
|
||||
message=f"Entities can have only one primary identifier. The entity"
|
||||
f" `{entity.name}` has {len(primary_identifier_names)}: {', '.join(primary_identifier_names)}",
|
||||
context=EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
),
|
||||
)
|
||||
]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues = []
|
||||
|
||||
for entity in model.entities:
|
||||
issues += OnePrimaryIdentifierPerEntityRule._only_one_primary_identifier(entity=entity)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SubIdentifierContext:
|
||||
"""Organizes the context behind identifiers and their sub-identifiers."""
|
||||
|
||||
entity: Entity
|
||||
identifier_reference: IdentifierReference
|
||||
sub_identifier_names: Tuple[str, ...]
|
||||
|
||||
|
||||
class IdentifierConsistencyRule(ModelValidationRule):
|
||||
"""Checks identifiers with the same name are defined with the same set of sub-identifiers in all entitys"""
|
||||
|
||||
@staticmethod
|
||||
def _get_sub_identifier_names(identifier: Identifier) -> Sequence[str]:
|
||||
sub_identifier_names = []
|
||||
sub_identifier: CompositeSubIdentifier
|
||||
for sub_identifier in identifier.identifiers or []:
|
||||
if sub_identifier.name:
|
||||
sub_identifier_names.append(sub_identifier.name)
|
||||
elif sub_identifier.ref:
|
||||
sub_identifier_names.append(sub_identifier.ref)
|
||||
return sub_identifier_names
|
||||
|
||||
@staticmethod
|
||||
def _get_sub_identifier_context(entity: Entity) -> Sequence[SubIdentifierContext]:
|
||||
contexts = []
|
||||
for identifier in entity.identifiers or []:
|
||||
contexts.append(
|
||||
SubIdentifierContext(
|
||||
entity=entity,
|
||||
identifier_reference=identifier.reference,
|
||||
sub_identifier_names=tuple(
|
||||
IdentifierConsistencyRule._get_sub_identifier_names(identifier)
|
||||
),
|
||||
)
|
||||
)
|
||||
return contexts
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
# build collection of sub-identifier contexts, keyed by identifier name
|
||||
identifier_to_sub_identifier_contexts: DefaultDict[
|
||||
str, List[SubIdentifierContext]
|
||||
] = defaultdict(list)
|
||||
all_contexts: List[SubIdentifierContext] = list(
|
||||
iter_flatten(
|
||||
[
|
||||
IdentifierConsistencyRule._get_sub_identifier_context(entity)
|
||||
for entity in model.entities
|
||||
]
|
||||
)
|
||||
)
|
||||
for context in all_contexts:
|
||||
identifier_to_sub_identifier_contexts[context.identifier_reference.name].append(
|
||||
context
|
||||
)
|
||||
|
||||
# Filter out anything that has fewer than 2 distinct sub-identifier sets
|
||||
invalid_sub_identifier_configurations = dict(
|
||||
filter(
|
||||
lambda item: len(set([context.sub_identifier_names for context in item[1]])) >= 2,
|
||||
identifier_to_sub_identifier_contexts.items(),
|
||||
)
|
||||
)
|
||||
|
||||
# convert each invalid identifier configuration into a validation warning
|
||||
for (
|
||||
identifier_name,
|
||||
sub_identifier_contexts,
|
||||
) in invalid_sub_identifier_configurations.items():
|
||||
entity = sub_identifier_contexts[0].entity
|
||||
issues.append(
|
||||
ValidationWarning(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=identifier_name
|
||||
),
|
||||
element_type=EntityElementType.IDENTIFIER,
|
||||
),
|
||||
message=(
|
||||
f"Identifier '{identifier_name}' does not have consistent sub-identifiers "
|
||||
f"throughout the model: {list(sorted(sub_identifier_contexts, key=lambda x: x.sub_identifier_names))}"
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
454
core/dbt/semantic/validations/measures.py
Normal file
454
core/dbt/semantic/validations/measures.py
Normal file
@@ -0,0 +1,454 @@
|
||||
from collections import defaultdict
|
||||
from typing import DefaultDict, Dict, List, Set
|
||||
|
||||
from dbt.semantic.aggregation_properties import AggregationType
|
||||
from dbt.semantic.references import MetricModelReference, MeasureReference
|
||||
from dbt.contracts.graph.dimensions import DimensionType
|
||||
from dbt.contracts.graph.metrics import MetricType
|
||||
from dbt.contracts.graph.nodes import Metric
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityElementContext,
|
||||
EntityElementReference,
|
||||
EntityElementType,
|
||||
MetricContext,
|
||||
ModelValidationRule,
|
||||
ValidationIssueType,
|
||||
ValidationError,
|
||||
ValidationWarning,
|
||||
iter_bucket,
|
||||
)
|
||||
|
||||
|
||||
class MeasureMetricProxyUniqueRule(ModelValidationRule):
|
||||
"""Asserts that measure names and metric names don't match unless measure proxy"""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
metric_names = [metric.name for metric in model.metrics]
|
||||
for entity in model.entities:
|
||||
for measure in entity.measures:
|
||||
if measure.name in metric_names:
|
||||
measure_name_match_index = next(
|
||||
(
|
||||
i
|
||||
for i, metric in enumerate(model.metrics)
|
||||
if metric.name == measure.name
|
||||
)
|
||||
)
|
||||
if measure_name_match_index:
|
||||
if (
|
||||
model.metrics[measure_name_match_index].type
|
||||
!= MetricType.MEASURE_PROXY
|
||||
):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=f"Cannot have metric with the same name as a measure ({measure.name}) that is not a "
|
||||
f"proxy for that measure",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class EntityMeasuresUniqueRule(ModelValidationRule):
|
||||
"""Asserts all measure names are unique across the model."""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
measure_references_to_entities: Dict[MeasureReference, List] = defaultdict(list)
|
||||
for entity in model.entities:
|
||||
for measure in entity.measures:
|
||||
if measure.reference in measure_references_to_entities:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=f"Found measure with name {measure.name} in multiple entitys with names "
|
||||
f"({measure_references_to_entities[measure.reference]})",
|
||||
)
|
||||
)
|
||||
measure_references_to_entities[measure.reference].append(entity.name)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class MeasureConstraintAliasesRule(ModelValidationRule):
|
||||
"""Checks that aliases are configured correctly for constrained measure references
|
||||
|
||||
These are, currently, only applicable for Metric types, since the MetricInputMeasure is only
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_required_aliases_are_set(
|
||||
metric: Metric, metric_context: MetricContext
|
||||
) -> List[ValidationIssueType]:
|
||||
"""Checks if valid aliases are set on the input measure references where they are required
|
||||
|
||||
Aliases are required whenever there are 2 or more input measures with the same measure
|
||||
reference with different constraints. When this happens, we require aliases for all
|
||||
constrained measures for the sake of clarity. Any unconstrained measure does not
|
||||
need an alias, since it always relies on the original measure specification.
|
||||
|
||||
At this time aliases are required for ratio metrics, but eventually we could relax that requirement
|
||||
if we can find an automatic aliasing scheme for numerator/denominator that we feel comfortable using.
|
||||
"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
if len(metric.measure_references) == len(set(metric.measure_references)):
|
||||
# All measure references are unique, so disambiguation via aliasing is not necessary
|
||||
return issues
|
||||
|
||||
# Note: more_itertools.bucket does not produce empty groups
|
||||
input_measures_by_name = iter_bucket(metric.input_measures, lambda x: x.name)
|
||||
for name in input_measures_by_name:
|
||||
input_measures = list(input_measures_by_name[name])
|
||||
|
||||
if len(input_measures) == 1:
|
||||
continue
|
||||
|
||||
distinct_input_measures = set(input_measures)
|
||||
if len(distinct_input_measures) == 1:
|
||||
# Warn whenever multiple identical references exist - we will consolidate these but it might be
|
||||
# a meaningful oversight if constraints and aliases are specified
|
||||
issues.append(
|
||||
ValidationWarning(
|
||||
context=metric_context,
|
||||
message=(
|
||||
f"Metric {metric.name} has multiple identical input measures specifications for measure "
|
||||
f"{name}. This might be hiding a semantic error. Input measure specification: "
|
||||
f"{input_measures[0]}."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
constrained_measures_without_aliases = [
|
||||
measure
|
||||
for measure in input_measures
|
||||
if measure.constraint is not None and measure.alias is None
|
||||
]
|
||||
if constrained_measures_without_aliases:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=metric_context,
|
||||
message=(
|
||||
f"Metric {metric.name} depends on multiple different constrained versions of measure "
|
||||
f"{name}. In such cases, aliases must be provided, but the following input measures have "
|
||||
f"constraints specified without an alias: {constrained_measures_without_aliases}."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Ensures measures that might need an alias have one set, and that the alias is distinct
|
||||
|
||||
We do not allow aliases to collide with other alias or measure names, since that could create
|
||||
ambiguity at query time or cause issues if users ever restructure their models.
|
||||
"""
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
measure_names = _get_measure_names_from_model(model)
|
||||
measure_alias_to_metrics: DefaultDict[str, List[str]] = defaultdict(list)
|
||||
for metric in model.metrics:
|
||||
metric_context = MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
)
|
||||
|
||||
issues += MeasureConstraintAliasesRule._validate_required_aliases_are_set(
|
||||
metric=metric, metric_context=metric_context
|
||||
)
|
||||
|
||||
aliased_measures = [
|
||||
input_measure
|
||||
for input_measure in metric.input_measures
|
||||
if input_measure.alias is not None
|
||||
]
|
||||
|
||||
for measure in aliased_measures:
|
||||
assert (
|
||||
measure.alias
|
||||
), "Type refinement assertion, previous filter should ensure this is true"
|
||||
issues += UniqueAndValidNameRule.check_valid_name(measure.alias)
|
||||
if measure.alias in measure_names:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=metric_context,
|
||||
message=(
|
||||
f"Alias `{measure.alias}` for measure `{measure.name}` conflicts with measure names "
|
||||
f"defined elsewhere in the model! This can cause ambiguity for certain types of "
|
||||
f"query. Please choose another alias."
|
||||
),
|
||||
)
|
||||
)
|
||||
if measure.alias in measure_alias_to_metrics:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=metric_context,
|
||||
message=(
|
||||
f"Measure alias {measure.alias} conflicts with a measure alias used elsewhere in the "
|
||||
f"model! This can cause ambiguity for certain types of query. Please choose another "
|
||||
f"alias, or, if the measures are constrained in the same way, consider centralizing "
|
||||
f"that definition in a new entity. Measure specification: {measure}. Existing "
|
||||
f"metrics with that measure alias used: {measure_alias_to_metrics[measure.alias]}"
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
measure_alias_to_metrics[measure.alias].append(metric.name)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class MetricMeasuresRule(ModelValidationRule):
|
||||
"""Checks that the measures referenced in the metrics exist."""
|
||||
|
||||
@staticmethod
|
||||
def _validate_metric_measure_references(
|
||||
metric: Metric, valid_measure_names: Set[str]
|
||||
) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for measure_reference in metric.measure_references:
|
||||
if measure_reference.name not in valid_measure_names:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message=(
|
||||
f"Measure {measure_reference.name} referenced in metric {metric.name} is not "
|
||||
f"defined in the model!"
|
||||
),
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
valid_measure_names = _get_measure_names_from_model(model)
|
||||
|
||||
for metric in model.metrics or []:
|
||||
issues += MetricMeasuresRule._validate_metric_measure_references(
|
||||
metric=metric, valid_measure_names=valid_measure_names
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
class MeasuresNonAdditiveDimensionRule(ModelValidationRule):
|
||||
"""Checks that the measure's non_additive_dimensions are properly defined."""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
for entity in model.entities or []:
|
||||
for measure in entity.measures:
|
||||
non_additive_dimension = measure.non_additive_dimension
|
||||
if non_additive_dimension is None:
|
||||
continue
|
||||
agg_time_dimension = next(
|
||||
(
|
||||
dim
|
||||
for dim in entity.dimensions
|
||||
if measure.checked_agg_time_dimension.name == dim.name
|
||||
),
|
||||
None,
|
||||
)
|
||||
if agg_time_dimension is None:
|
||||
# Sanity check, should never hit this
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a agg_time_dimension of {measure.checked_agg_time_dimension.name} "
|
||||
f"that is not defined as a dimension in entity '{entity.name}'."
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
# Validates that the non_additive_dimension exists as a time dimension in the entity
|
||||
matching_dimension = next(
|
||||
(dim for dim in entity.dimensions if non_additive_dimension.name == dim.name),
|
||||
None,
|
||||
)
|
||||
if matching_dimension is None:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' "
|
||||
f"that is not defined as a dimension in entity '{entity.name}'."
|
||||
),
|
||||
)
|
||||
)
|
||||
if matching_dimension:
|
||||
# Check that it's a time dimension
|
||||
if matching_dimension.type != DimensionType.TIME:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' "
|
||||
f"that is defined as a categorical dimension which is not supported."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Validates that the non_additive_dimension time_granularity is >= agg_time_dimension time_granularity
|
||||
if (
|
||||
matching_dimension.type_params
|
||||
and agg_time_dimension.type_params
|
||||
and (
|
||||
matching_dimension.type_params.time_granularity
|
||||
!= agg_time_dimension.type_params.time_granularity
|
||||
)
|
||||
):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' that has "
|
||||
f"a base time granularity ({matching_dimension.type_params.time_granularity.name}) that is not equal to the measure's "
|
||||
f"agg_time_dimension {agg_time_dimension.name} with a base granularity of ({agg_time_dimension.type_params.time_granularity.name})."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Validates that the window_choice is either MIN/MAX
|
||||
if non_additive_dimension.window_choice not in {
|
||||
AggregationType.MIN,
|
||||
AggregationType.MAX,
|
||||
}:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a non_additive_dimension with an invalid 'window_choice' of '{non_additive_dimension.window_choice.value}'. "
|
||||
f"Only choices supported are 'min' or 'max'."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Validates that all window_groupings are identifiers
|
||||
identifiers_in_entity = {identifier.name for identifier in entity.identifiers}
|
||||
window_groupings = set(non_additive_dimension.window_groupings)
|
||||
intersected_identifiers = window_groupings.intersection(identifiers_in_entity)
|
||||
if len(intersected_identifiers) != len(window_groupings):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=(
|
||||
f"Measure '{measure.name}' has a non_additive_dimension with an invalid 'window_groupings'. "
|
||||
f"These identifiers {window_groupings.difference(intersected_identifiers)} do not exist in the entity."
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class CountAggregationExprRule(ModelValidationRule):
|
||||
"""Checks that COUNT measures have an expr provided."""
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for entity in model.entities:
|
||||
for measure in entity.measures:
|
||||
context = EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
)
|
||||
if measure.agg == AggregationType.COUNT and measure.expr is None:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Measure '{measure.name}' uses a COUNT aggregation, which requires an expr to be provided. "
|
||||
f"Provide 'expr: 1' if a count of all rows is desired."
|
||||
),
|
||||
)
|
||||
)
|
||||
if (
|
||||
measure.agg == AggregationType.COUNT
|
||||
and measure.expr
|
||||
and measure.expr.lower().startswith("distinct ")
|
||||
):
|
||||
# TODO: Expand this to include SUM and potentially AVG agg types as well
|
||||
# Note expansion of this guard requires the addition of sum_distinct and avg_distinct agg types
|
||||
# or else an adjustment to the error message below.
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=context,
|
||||
message=(
|
||||
f"Measure '{measure.name}' uses a '{measure.agg.value}' aggregation with a DISTINCT expr: "
|
||||
f"'{measure.expr}. This is not supported, as it effectively converts an additive "
|
||||
f"measure into a non-additive one, and this could cause certain queries to return "
|
||||
f"incorrect results. Please use the {measure.agg.value}_distinct aggregation type."
|
||||
),
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
|
||||
def _get_measure_names_from_model(model: UserConfiguredModel) -> Set[str]:
|
||||
"""Return every distinct measure name specified in the model"""
|
||||
measure_names = set()
|
||||
for entity in model.entities:
|
||||
for measure in entity.measures:
|
||||
measure_names.add(measure.reference.name)
|
||||
|
||||
return measure_names
|
||||
143
core/dbt/semantic/validations/metrics.py
Normal file
143
core/dbt/semantic/validations/metrics.py
Normal file
@@ -0,0 +1,143 @@
|
||||
import traceback
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from dbt.exceptions import DbtSemanticValidationError
|
||||
from dbt.semantic.references import MetricModelReference
|
||||
from dbt.contracts.graph.nodes import Metric
|
||||
from dbt.contracts.graph.metrics import MetricType, MetricTimeWindow
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
MetricContext,
|
||||
ModelValidationRule,
|
||||
ValidationIssueType,
|
||||
ValidationError,
|
||||
)
|
||||
|
||||
|
||||
class CumulativeMetricRule(ModelValidationRule):
|
||||
"""Checks that cumulative sum metrics are configured properly"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_cumulative_sum_metric_params(metric: Metric) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
if metric.type == MetricType.CUMULATIVE:
|
||||
if metric.type_params.window and metric.type_params.grain_to_date:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message="Both window and grain_to_date set for cumulative metric. Please set one or the other",
|
||||
)
|
||||
)
|
||||
|
||||
if metric.type_params.window:
|
||||
try:
|
||||
MetricTimeWindow.parse(metric.type_params.window.to_string())
|
||||
except DbtSemanticValidationError as e:
|
||||
breakpoint()
|
||||
if sys.version_info >= (3, 10):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message="".join(
|
||||
traceback.format_exception_only(exc=type(e), value=e)
|
||||
),
|
||||
extra_detail="".join(traceback.format_tb(e.__traceback__)),
|
||||
)
|
||||
)
|
||||
elif sys.version_info < (3, 10):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message="".join(
|
||||
traceback.format_exception_only(etype=type(e), value=e)
|
||||
),
|
||||
extra_detail="".join(traceback.format_tb(e.__traceback__)),
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for metric in model.metrics or []:
|
||||
issues += CumulativeMetricRule._validate_cumulative_sum_metric_params(metric=metric)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
class DerivedMetricRule(ModelValidationRule):
|
||||
"""Checks that derived metrics are configured properly"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_alias_collision(metric: Metric) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
if metric.type == MetricType.DERIVED:
|
||||
used_names = {input_metric.name for input_metric in metric.input_metrics}
|
||||
for input_metric in metric.input_metrics:
|
||||
if input_metric.alias:
|
||||
issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias)
|
||||
if input_metric.alias in used_names:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"Alias '{input_metric.alias}' for input metric: '{input_metric.name}' is already being used. Please choose another alias.",
|
||||
)
|
||||
)
|
||||
used_names.add(input_metric.alias)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_input_metrics_exist(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
all_metrics = {m.name for m in model.metrics}
|
||||
for metric in model.metrics:
|
||||
if metric.type == MetricType.DERIVED:
|
||||
for input_metric in metric.input_metrics:
|
||||
if input_metric.name not in all_metrics:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not exist as a configured metric in the model.",
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for input_metric in metric.input_metrics or []:
|
||||
if input_metric.offset_window and input_metric.offset_to_grain:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message=f"Both offset_window and offset_to_grain set for derived metric '{metric.name}' on input metric '{input_metric.name}'. Please set one or the other.",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
issues += DerivedMetricRule._validate_input_metrics_exist(model=model)
|
||||
for metric in model.metrics or []:
|
||||
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
|
||||
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
|
||||
return issues
|
||||
102
core/dbt/semantic/validations/model_validator.py
Normal file
102
core/dbt/semantic/validations/model_validator.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
import copy
|
||||
from typing import List, Sequence
|
||||
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.agg_time_dimensions import AggregationTimeDimensionRule
|
||||
from dbt.semantic.validations.entities import (
|
||||
EntityTimeDimensionWarningsRule,
|
||||
EntityValidityWindowRule,
|
||||
)
|
||||
from dbt.semantic.validations.dimension_const import DimensionConsistencyRule
|
||||
from dbt.semantic.validations.element_const import ElementConsistencyRule
|
||||
from dbt.semantic.validations.identifiers import (
|
||||
IdentifierConfigRule,
|
||||
IdentifierConsistencyRule,
|
||||
NaturalIdentifierConfigurationRule,
|
||||
OnePrimaryIdentifierPerEntityRule,
|
||||
)
|
||||
from dbt.semantic.validations.measures import (
|
||||
CountAggregationExprRule,
|
||||
EntityMeasuresUniqueRule,
|
||||
MeasureConstraintAliasesRule,
|
||||
MetricMeasuresRule,
|
||||
MeasuresNonAdditiveDimensionRule,
|
||||
MeasureMetricProxyUniqueRule,
|
||||
)
|
||||
from dbt.semantic.validations.metrics import CumulativeMetricRule, DerivedMetricRule
|
||||
from dbt.semantic.validations.reserved_keywords import ReservedKeywordsRule
|
||||
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
ModelValidationResults,
|
||||
ModelValidationRule,
|
||||
ModelValidationException,
|
||||
ModelBuildResult,
|
||||
)
|
||||
|
||||
|
||||
class ModelValidator:
|
||||
"""A Validator that acts on UserConfiguredModel"""
|
||||
|
||||
DEFAULT_RULES = (
|
||||
DerivedMetricRule(),
|
||||
CountAggregationExprRule(),
|
||||
EntityMeasuresUniqueRule(),
|
||||
EntityTimeDimensionWarningsRule(),
|
||||
EntityValidityWindowRule(),
|
||||
DimensionConsistencyRule(),
|
||||
ElementConsistencyRule(),
|
||||
IdentifierConfigRule(),
|
||||
IdentifierConsistencyRule(),
|
||||
NaturalIdentifierConfigurationRule(),
|
||||
OnePrimaryIdentifierPerEntityRule(),
|
||||
MeasureConstraintAliasesRule(),
|
||||
MetricMeasuresRule(),
|
||||
CumulativeMetricRule(),
|
||||
UniqueAndValidNameRule(),
|
||||
AggregationTimeDimensionRule(),
|
||||
ReservedKeywordsRule(),
|
||||
MeasuresNonAdditiveDimensionRule(),
|
||||
MeasureMetricProxyUniqueRule(),
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self, rules: Sequence[ModelValidationRule] = DEFAULT_RULES, max_workers: int = 1
|
||||
) -> None:
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
rules: List of validation rules to run. Defaults to DEFAULT_RULES
|
||||
max_workers: sets the max number of rules to run against the model concurrently
|
||||
"""
|
||||
|
||||
# Raises an error if 'rules' is an empty sequence or None
|
||||
if not rules:
|
||||
raise ValueError(
|
||||
"ModelValidator 'rules' must be a sequence with at least one ModelValidationRule."
|
||||
)
|
||||
|
||||
self._rules = rules
|
||||
self._executor = ProcessPoolExecutor(max_workers=max_workers)
|
||||
|
||||
def validate_model(self, model: UserConfiguredModel) -> ModelBuildResult:
|
||||
"""Validate a model according to configured rules."""
|
||||
|
||||
issues: List[ModelValidationResults] = []
|
||||
|
||||
for rule in self._rules:
|
||||
issues.append(ModelValidationResults.from_issues_sequence(rule.validate_model(model)))
|
||||
|
||||
return ModelBuildResult(model=model, issues=ModelValidationResults.merge(issues))
|
||||
|
||||
def checked_validations(
|
||||
self, model: UserConfiguredModel
|
||||
) -> UserConfiguredModel: # chTODO: remember checked_build
|
||||
"""Similar to validate(), but throws an exception if validation fails."""
|
||||
model_copy = copy.deepcopy(model)
|
||||
build_result = self.validate_model(model_copy)
|
||||
|
||||
if build_result.issues.has_blocking_issues:
|
||||
raise ModelValidationException(issues=tuple(build_result.issues.all_issues))
|
||||
|
||||
return model
|
||||
53
core/dbt/semantic/validations/not_empty.py
Normal file
53
core/dbt/semantic/validations/not_empty.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# TODO: Delete this rule as we don't need to enforce that these nodes are present
|
||||
|
||||
|
||||
# from typing import List
|
||||
|
||||
# from dbt.contracts.graph.manifest import UserConfiguredModel
|
||||
# from dbt.semantic.validations.validator_helpers import (
|
||||
# ModelValidationRule,
|
||||
# ValidationError,
|
||||
# ValidationIssueType,
|
||||
# )
|
||||
|
||||
|
||||
# class NonEmptyRule(ModelValidationRule):
|
||||
# """Check if the model contains data sources and metrics."""
|
||||
|
||||
# @staticmethod
|
||||
# def _check_model_has_data_sources(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
# issues: List[ValidationIssueType] = []
|
||||
# if not model.entities:
|
||||
# issues.append(
|
||||
# ValidationError(
|
||||
# message="No entities present in the model.",
|
||||
# )
|
||||
# )
|
||||
# return issues
|
||||
|
||||
# @staticmethod
|
||||
# def _check_model_has_metrics(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
# issues: List[ValidationIssueType] = []
|
||||
|
||||
# # If we are going to generate measure proxy metrics that is sufficient as well
|
||||
# create_measure_proxy_metrics = False
|
||||
# for data_source in model.data_sources:
|
||||
# for measure in data_source.measures:
|
||||
# if measure.create_metric is True:
|
||||
# create_measure_proxy_metrics = True
|
||||
# break
|
||||
|
||||
# if not model.metrics and not create_measure_proxy_metrics:
|
||||
# issues.append(
|
||||
# ValidationError(
|
||||
# message="No metrics present in the model.",
|
||||
# )
|
||||
# )
|
||||
# return issues
|
||||
|
||||
# @staticmethod
|
||||
# def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
# issues: List[ValidationIssueType] = []
|
||||
# issues += NonEmptyRule._check_model_has_data_sources(model=model)
|
||||
# issues += NonEmptyRule._check_model_has_metrics(model=model)
|
||||
# return issues
|
||||
139
core/dbt/semantic/validations/reserved_keywords.py
Normal file
139
core/dbt/semantic/validations/reserved_keywords.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from typing import List
|
||||
from dbt.semantic.references import EntityElementReference
|
||||
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
ModelValidationRule,
|
||||
ValidationError,
|
||||
ValidationIssueType,
|
||||
)
|
||||
|
||||
# A non-exaustive tuple of reserved keywords
|
||||
# This list was created by running an intersection of keywords for redshift,
|
||||
# postgres, bigquery, and snowflake
|
||||
RESERVED_KEYWORDS = (
|
||||
"and",
|
||||
"as",
|
||||
"create",
|
||||
"distinct",
|
||||
"for",
|
||||
"from",
|
||||
"full",
|
||||
"having",
|
||||
"in",
|
||||
"inner",
|
||||
"into",
|
||||
"is",
|
||||
"join",
|
||||
"left",
|
||||
"like",
|
||||
"natural",
|
||||
"not",
|
||||
"null",
|
||||
"on",
|
||||
"or",
|
||||
"order",
|
||||
"right",
|
||||
"select",
|
||||
"union",
|
||||
"using",
|
||||
"where",
|
||||
"with",
|
||||
)
|
||||
|
||||
|
||||
class ReservedKeywordsRule(ModelValidationRule):
|
||||
"""Check that any element that ends up being selected by name (instead of expr) isn't a commonly reserved keyword.
|
||||
|
||||
Note: This rule DOES NOT catch all keywords. That is because keywords are
|
||||
engine specific, and semantic validations are not engine specific. I.e. if
|
||||
you change your underlying data warehouse engine, semantic validations
|
||||
should still pass, but your data warehouse validations might fail. However,
|
||||
data warehouse validations are slow in comparison to semantic validation
|
||||
rules. Thus this rule is intended to catch words that are reserved keywords
|
||||
in all supported engines and to fail fast. E.g., `USER` is a reserved keyword
|
||||
in Redshift but not in all other supported engines. Therefore if one is
|
||||
using Redshift and sets a dimension name to `user`, the config would pass
|
||||
this rule, but would then fail Data Warehouse Validations.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity_sub_elements(entity: Entity) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
if entity.dimensions:
|
||||
for dimension in entity.dimensions:
|
||||
if dimension.name.lower() in RESERVED_KEYWORDS:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=dimension.name
|
||||
),
|
||||
element_type=EntityElementType.DIMENSION,
|
||||
),
|
||||
message=f"'{dimension.name}' is an SQL reserved keyword, and thus cannot be used as a dimension 'name'.",
|
||||
)
|
||||
)
|
||||
|
||||
if entity.identifiers:
|
||||
for identifier in entity.identifiers:
|
||||
if identifier.is_composite:
|
||||
msg = "'{name}' is an SQL reserved keyword, and thus cannot be used as a sub-identifier 'name'"
|
||||
names = [
|
||||
sub_ident.name
|
||||
for sub_ident in identifier.identifiers
|
||||
if sub_ident.name is not None
|
||||
]
|
||||
else:
|
||||
msg = "'{name}' is an SQL reserved keyword, and thus cannot be used as an identifier 'name'"
|
||||
names = [identifier.name]
|
||||
|
||||
for name in names:
|
||||
if name.lower() in RESERVED_KEYWORDS:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=identifier.name
|
||||
),
|
||||
element_type=EntityElementType.IDENTIFIER,
|
||||
),
|
||||
message=msg.format(name=name),
|
||||
)
|
||||
)
|
||||
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
if measure.name.lower() in RESERVED_KEYWORDS:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
message=f"'{measure.name}' is an SQL reserved keyword, and thus cannot be used as an measure 'name'.",
|
||||
)
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
@classmethod
|
||||
def _validate_entities(cls, model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Checks names of objects that are not nested."""
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for entity in model.entities:
|
||||
issues += cls._validate_entity_sub_elements(entity=entity)
|
||||
|
||||
return issues
|
||||
|
||||
@classmethod
|
||||
def validate_model(cls, model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
return cls._validate_entities(model=model)
|
||||
213
core/dbt/semantic/validations/unique_valid_name.py
Normal file
213
core/dbt/semantic/validations/unique_valid_name.py
Normal file
@@ -0,0 +1,213 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import re
|
||||
from typing import Dict, Tuple, List
|
||||
from dbt.semantic.references import (
|
||||
EntityElementReference,
|
||||
EntityReference,
|
||||
MetricModelReference,
|
||||
ElementReference,
|
||||
)
|
||||
|
||||
from dbt.contracts.graph.nodes import Entity
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.validations.validator_helpers import (
|
||||
EntityContext,
|
||||
EntityElementContext,
|
||||
EntityElementType,
|
||||
MetricContext,
|
||||
ModelValidationRule,
|
||||
ValidationContext,
|
||||
ValidationError,
|
||||
ValidationIssueType,
|
||||
)
|
||||
from dbt.semantic.object_utils import assert_values_exhausted
|
||||
from dbt.semantic.time import TimeGranularity
|
||||
|
||||
|
||||
@enum.unique
|
||||
class SemanticReservedKeywords(enum.Enum):
|
||||
"""Enumeration of reserved keywords with helper for accessing the reason they are reserved"""
|
||||
|
||||
METRIC_TIME = "metric_time"
|
||||
DBT_INTERNAL_UUID = "dbt_internal_uuid"
|
||||
|
||||
@staticmethod
|
||||
def get_reserved_reason(keyword: SemanticReservedKeywords) -> str:
|
||||
"""Get the reason a given keyword is reserved. Guarantees an exhaustive switch"""
|
||||
if keyword is SemanticReservedKeywords.METRIC_TIME:
|
||||
return (
|
||||
"Used as the query input for creating time series metrics from measures with "
|
||||
"different time dimension names."
|
||||
)
|
||||
elif keyword is SemanticReservedKeywords.DBT_INTERNAL_UUID:
|
||||
return "Used internally to reference a column that has a uuid generated by dbt."
|
||||
else:
|
||||
assert_values_exhausted(keyword)
|
||||
|
||||
|
||||
class UniqueAndValidNameRule(ModelValidationRule):
|
||||
"""Check that names are unique and valid.
|
||||
|
||||
* Names of elements in data sources are unique / valid within the data source.
|
||||
* Names of data sources, dimension sets, metric sets, and materializations in the model are unique / valid.
|
||||
"""
|
||||
|
||||
NAME_REGEX = re.compile(r"\A[a-z][a-z0-9_]*[a-z0-9]\Z")
|
||||
|
||||
@staticmethod
|
||||
def check_valid_name(name: str) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
if not UniqueAndValidNameRule.NAME_REGEX.match(name):
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"Invalid name `{name}` - names should only consist of lower case letters, numbers, "
|
||||
f"and underscores. In addition, names should start with a lower case letter, and should not end "
|
||||
f"with an underscore, and they must be at least 2 characters long.",
|
||||
)
|
||||
)
|
||||
if name.upper() in TimeGranularity.list_names():
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"Invalid name `{name}` - names cannot match reserved time granularity keywords "
|
||||
f"({TimeGranularity.list_names()})",
|
||||
)
|
||||
)
|
||||
if name.lower() in {reserved_name.value for reserved_name in SemanticReservedKeywords}:
|
||||
reason = SemanticReservedKeywords.get_reserved_reason(
|
||||
SemanticReservedKeywords(name.lower())
|
||||
)
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"Invalid name `{name}` - this name is reserved by MetricFlow. Reason: {reason}",
|
||||
)
|
||||
)
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_entity_elements(entity: Entity) -> List[ValidationIssueType]:
|
||||
issues: List[ValidationIssueType] = []
|
||||
element_info_tuples: List[Tuple[ElementReference, str, ValidationContext]] = []
|
||||
|
||||
if entity.measures:
|
||||
for measure in entity.measures:
|
||||
element_info_tuples.append(
|
||||
(
|
||||
measure.reference,
|
||||
"measure",
|
||||
EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=measure.name
|
||||
),
|
||||
element_type=EntityElementType.MEASURE,
|
||||
),
|
||||
)
|
||||
)
|
||||
if entity.identifiers:
|
||||
for identifier in entity.identifiers:
|
||||
element_info_tuples.append(
|
||||
(
|
||||
identifier.reference,
|
||||
"identifier",
|
||||
EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=identifier.name
|
||||
),
|
||||
element_type=EntityElementType.IDENTIFIER,
|
||||
),
|
||||
)
|
||||
)
|
||||
if entity.dimensions:
|
||||
for dimension in entity.dimensions:
|
||||
element_info_tuples.append(
|
||||
(
|
||||
dimension.reference,
|
||||
"dimension",
|
||||
EntityElementContext(
|
||||
entity_element=EntityElementReference(
|
||||
entity_name=entity.name, name=dimension.name
|
||||
),
|
||||
element_type=EntityElementType.DIMENSION,
|
||||
),
|
||||
)
|
||||
)
|
||||
name_to_type: Dict[ElementReference, str] = {}
|
||||
|
||||
for name, _type, context in element_info_tuples:
|
||||
if name in name_to_type:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"In entity `{entity.name}`, can't use name `{name.name}` for a "
|
||||
f"{_type} when it was already used for a {name_to_type[name]}",
|
||||
)
|
||||
)
|
||||
else:
|
||||
name_to_type[name] = _type
|
||||
|
||||
for name, _type, context in element_info_tuples:
|
||||
issues += UniqueAndValidNameRule.check_valid_name(name=name.name)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def _validate_top_level_objects(model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Checks names of objects that are not nested."""
|
||||
object_info_tuples = []
|
||||
if model.entities:
|
||||
for entity in model.entities:
|
||||
object_info_tuples.append(
|
||||
(
|
||||
entity.name,
|
||||
"entity",
|
||||
EntityContext(
|
||||
entity=EntityReference(entity_name=entity.name),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
name_to_type: Dict[str, str] = {}
|
||||
|
||||
issues: List[ValidationIssueType] = []
|
||||
|
||||
for name, type_, context in object_info_tuples:
|
||||
if name in name_to_type:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
message=f"Can't use name `{name}` for a {type_} when it was already used for a "
|
||||
f"{name_to_type[name]}",
|
||||
)
|
||||
)
|
||||
else:
|
||||
name_to_type[name] = type_
|
||||
|
||||
if model.metrics:
|
||||
metric_names = set()
|
||||
for metric in model.metrics:
|
||||
if metric.name in metric_names:
|
||||
issues.append(
|
||||
ValidationError(
|
||||
context=MetricContext(
|
||||
metric=MetricModelReference(metric_name=metric.name),
|
||||
),
|
||||
message=f"Can't use name `{metric.name}` for a metric when it was already used for a metric",
|
||||
)
|
||||
)
|
||||
else:
|
||||
metric_names.add(metric.name)
|
||||
|
||||
for name, _type, context in object_info_tuples:
|
||||
issues += UniqueAndValidNameRule.check_valid_name(name=name)
|
||||
|
||||
return issues
|
||||
|
||||
@staticmethod
|
||||
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
|
||||
issues = []
|
||||
issues += UniqueAndValidNameRule._validate_top_level_objects(model=model)
|
||||
|
||||
for entity in model.entities:
|
||||
issues += UniqueAndValidNameRule._validate_entity_elements(entity=entity)
|
||||
|
||||
return issues
|
||||
439
core/dbt/semantic/validations/validator_helpers.py
Normal file
439
core/dbt/semantic/validations/validator_helpers.py
Normal file
@@ -0,0 +1,439 @@
|
||||
from __future__ import annotations
|
||||
from collections import defaultdict, deque
|
||||
|
||||
import click
|
||||
import functools
|
||||
import traceback
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
||||
from dbt.dataclass_schema import dbtClassMixin
|
||||
|
||||
from itertools import chain
|
||||
|
||||
from dbt.semantic.references import (
|
||||
EntityElementReference,
|
||||
EntityReference,
|
||||
MetricModelReference,
|
||||
)
|
||||
from dbt.contracts.graph.dimensions import DimensionType
|
||||
from dbt.semantic.user_configured_model import UserConfiguredModel
|
||||
from dbt.semantic.object_utils import assert_values_exhausted
|
||||
|
||||
VALIDATE_SAFELY_ERROR_STR_TMPLT = (
|
||||
". Issue occurred in method `{method_name}` called with {arguments_str}"
|
||||
)
|
||||
ValidationContextJSON = Dict[str, Union[str, int, None]]
|
||||
ValidationIssueJSON = Dict[str, Union[str, int, ValidationContextJSON]]
|
||||
|
||||
|
||||
class ValidationIssueLevel(Enum):
|
||||
"""Categorize the issues found while validating a MQL model."""
|
||||
|
||||
# Issue should be fixed, but model will still work in MQL
|
||||
WARNING = 0
|
||||
# Issue doesn't prevent model from working in MQL yet, but will eventually be an error
|
||||
FUTURE_ERROR = 1
|
||||
# Issue will prevent the model from working in MQL
|
||||
ERROR = 2
|
||||
|
||||
@property
|
||||
def name_plural(self) -> str:
|
||||
"""Controlled pluralization of ValidationIssueLevel name value"""
|
||||
|
||||
return f"{self.name}S"
|
||||
|
||||
|
||||
ISSUE_COLOR_MAP = {
|
||||
ValidationIssueLevel.WARNING: "cyan",
|
||||
ValidationIssueLevel.ERROR: "bright_red",
|
||||
ValidationIssueLevel.FUTURE_ERROR: "bright_yellow",
|
||||
}
|
||||
|
||||
|
||||
class EntityElementType(Enum):
|
||||
"""Maps entity element types to a readable string."""
|
||||
|
||||
MEASURE = "measure"
|
||||
DIMENSION = "dimension"
|
||||
IDENTIFIER = "identifier"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricContext(dbtClassMixin):
|
||||
"""The context class for validation issues involving metrics"""
|
||||
|
||||
metric: MetricModelReference
|
||||
|
||||
def context_str(self) -> str:
|
||||
"""Human readable stringified representation of the context"""
|
||||
return f"With metric `{self.metric.metric_name}`"
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityContext(dbtClassMixin):
|
||||
"""The context class for validation issues involving entities"""
|
||||
|
||||
entity: EntityReference
|
||||
|
||||
def context_str(self) -> str:
|
||||
"""Human readable stringified representation of the context"""
|
||||
return f"With entity `{self.entity.entity_name}`"
|
||||
|
||||
|
||||
@dataclass
|
||||
class EntityElementContext(dbtClassMixin):
|
||||
"""The context class for validation issues involving dimensions"""
|
||||
|
||||
entity_element: EntityElementReference
|
||||
element_type: EntityElementType
|
||||
|
||||
def context_str(self) -> str:
|
||||
"""Human readable stringified representation of the context"""
|
||||
return f"With {self.element_type.value} `{self.entity_element.name}` in entity `{self.entity_element.entity_name}`"
|
||||
|
||||
|
||||
ValidationContext = Union[
|
||||
MetricContext,
|
||||
EntityContext,
|
||||
EntityElementContext,
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationIssue(dbtClassMixin):
|
||||
"""The abstract base ValidationIsssue class that the specific ValidationIssue classes are built from"""
|
||||
|
||||
message: str
|
||||
context: Optional[ValidationContext] = None
|
||||
extra_detail: Optional[str] = None
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def level(self) -> ValidationIssueLevel:
|
||||
"""The level of of ValidationIssue"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def as_readable_str(self, verbose: bool = False, prefix: Optional[str] = None) -> str:
|
||||
"""Return a easily readable string that can be used to log the issue."""
|
||||
prefix = prefix or self.level.name
|
||||
|
||||
# The following is two lines instead of one line because
|
||||
# technically self.context.context_str() can return an empty str
|
||||
context_str = self.context.context_str() if self.context else ""
|
||||
context_str += " - " if context_str != "" else ""
|
||||
|
||||
issue_str = f"{prefix}: {context_str}{self.message}"
|
||||
if verbose and self.extra_detail is not None:
|
||||
issue_str += f"\n{self.extra_detail}"
|
||||
|
||||
return issue_str
|
||||
|
||||
def as_cli_formatted_str(self, verbose: bool = False) -> str:
|
||||
"""Returns a color-coded readable string for rendering issues in the CLI"""
|
||||
return self.as_readable_str(
|
||||
verbose=verbose,
|
||||
prefix=click.style(self.level.name, bold=True, fg=ISSUE_COLOR_MAP[self.level]),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationWarning(ValidationIssue, dbtClassMixin):
|
||||
"""A warning that was found while validating the model."""
|
||||
|
||||
@property
|
||||
def level(self) -> ValidationIssueLevel: # noqa: D
|
||||
return ValidationIssueLevel.WARNING
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationFutureError(ValidationIssue, dbtClassMixin):
|
||||
"""A future error that was found while validating the model."""
|
||||
|
||||
error_date: date = date(year=2030, month=1, day=1)
|
||||
|
||||
@property
|
||||
def level(self) -> ValidationIssueLevel: # noqa: D
|
||||
return ValidationIssueLevel.FUTURE_ERROR
|
||||
|
||||
def as_readable_str(self, verbose: bool = False, prefix: Optional[str] = None) -> str:
|
||||
"""Return a easily readable string that can be used to log the issue."""
|
||||
return (
|
||||
f"{super().as_readable_str(verbose=verbose, prefix=prefix)}"
|
||||
f"IMPORTANT: this error will break your model starting {self.error_date.strftime('%b %d, %Y')}. "
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass
|
||||
class ValidationError(ValidationIssue, dbtClassMixin):
|
||||
"""An error that was found while validating the model."""
|
||||
|
||||
@property
|
||||
def level(self) -> ValidationIssueLevel: # noqa: D
|
||||
return ValidationIssueLevel.ERROR
|
||||
|
||||
|
||||
ValidationIssueType = Union[ValidationWarning, ValidationFutureError, ValidationError]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelValidationResults(dbtClassMixin):
|
||||
"""Class for organizating the results of running validations"""
|
||||
|
||||
warnings: Tuple[ValidationWarning, ...] = tuple()
|
||||
future_errors: Tuple[ValidationFutureError, ...] = tuple()
|
||||
errors: Tuple[ValidationError, ...] = tuple()
|
||||
|
||||
@property
|
||||
def has_blocking_issues(self) -> bool:
|
||||
"""Does the ModelValidationResults have ERROR issues"""
|
||||
return len(self.errors) != 0
|
||||
|
||||
@classmethod
|
||||
def from_issues_sequence(cls, issues: Sequence[ValidationIssueType]) -> ModelValidationResults:
|
||||
"""Constructs a ModelValidationResults class from a list of ValidationIssues"""
|
||||
|
||||
warnings: List[ValidationWarning] = []
|
||||
future_errors: List[ValidationFutureError] = []
|
||||
errors: List[ValidationError] = []
|
||||
|
||||
for issue in issues:
|
||||
if issue.level is ValidationIssueLevel.WARNING:
|
||||
warnings.append(issue)
|
||||
elif issue.level is ValidationIssueLevel.FUTURE_ERROR:
|
||||
future_errors.append(issue)
|
||||
elif issue.level is ValidationIssueLevel.ERROR:
|
||||
errors.append(issue)
|
||||
else:
|
||||
assert_values_exhausted(issue.level)
|
||||
return cls(
|
||||
warnings=tuple(warnings), future_errors=tuple(future_errors), errors=tuple(errors)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def merge(cls, results: Sequence[ModelValidationResults]) -> ModelValidationResults:
|
||||
"""Creates a new ModelValidatorResults instance from multiple instances
|
||||
|
||||
This is useful when there are multiple validators that are run and the
|
||||
combined results are desireable. For instance there is a ModelValidator
|
||||
and a DataWarehouseModelValidator. These both return validation issues.
|
||||
If it's desireable to combine the results, the following makes it easy.
|
||||
"""
|
||||
|
||||
if not isinstance(results, List):
|
||||
results = list(results)
|
||||
|
||||
# this nested comprehension syntax is a little disorienting
|
||||
# basically [element for object in list_of_objects for element in object.list_property]
|
||||
# translates to "for each element in an object's list for each object in a list of objects"
|
||||
warnings = tuple(issue for result in results for issue in result.warnings)
|
||||
future_errors = tuple(issue for result in results for issue in result.future_errors)
|
||||
errors = tuple(issue for result in results for issue in result.errors)
|
||||
|
||||
return cls(
|
||||
warnings=warnings,
|
||||
future_errors=future_errors,
|
||||
errors=errors,
|
||||
)
|
||||
|
||||
@property
|
||||
def all_issues(self) -> Tuple[ValidationIssueType, ...]:
|
||||
"""For when a singular list of issues is needed"""
|
||||
return self.errors + self.future_errors + self.warnings
|
||||
|
||||
def summary(self) -> str:
|
||||
"""Returns a stylized summary string for issues"""
|
||||
|
||||
errors = click.style(
|
||||
text=f"{ValidationIssueLevel.ERROR.name_plural}: {len(self.errors)}",
|
||||
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.ERROR],
|
||||
)
|
||||
future_erros = click.style(
|
||||
text=f"{ValidationIssueLevel.FUTURE_ERROR.name_plural}: {len(self.future_errors)}",
|
||||
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.FUTURE_ERROR],
|
||||
)
|
||||
warnings = click.style(
|
||||
text=f"{ValidationIssueLevel.WARNING.name_plural}: {len(self.warnings)}",
|
||||
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.WARNING],
|
||||
)
|
||||
return f"{errors}, {future_erros}, {warnings}"
|
||||
|
||||
|
||||
def generate_exception_issue(
|
||||
what_was_being_done: str,
|
||||
e: Exception,
|
||||
context: Optional[ValidationContext] = None,
|
||||
extras: Dict[str, str] = {},
|
||||
) -> ValidationIssueType:
|
||||
"""Generates a validation issue for exceptions"""
|
||||
if "stacktrace" not in extras:
|
||||
extras["stacktrace"] = "".join(traceback.format_tb(e.__traceback__))
|
||||
|
||||
return ValidationError(
|
||||
context=context,
|
||||
message=f"An error occured while {what_was_being_done} - {''.join(traceback.format_exception_only(etype=type(e), value=e))}",
|
||||
extra_detail="\n".join([f"{key}: {value}" for key, value in extras.items()]),
|
||||
)
|
||||
|
||||
|
||||
def _func_args_to_string(*args: Any, **kwargs: Any) -> str: # type: ignore
|
||||
return f"positional args: {args}, key word args: {kwargs}"
|
||||
|
||||
|
||||
def validate_safely(whats_being_done: str) -> Callable:
|
||||
"""Decorator to safely run validation checks"""
|
||||
|
||||
def decorator_check_element_safely(func: Callable) -> Callable: # noqa
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> List[ValidationIssueType]: # type: ignore
|
||||
"""Safely run a check on model elements"""
|
||||
issues: List[ValidationIssueType]
|
||||
try:
|
||||
issues = func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
arguments_str = _func_args_to_string(*args, **kwargs)
|
||||
issues = [
|
||||
generate_exception_issue(
|
||||
what_was_being_done=whats_being_done,
|
||||
e=e,
|
||||
extras={"method_name": func.__name__, "passed_args": arguments_str},
|
||||
)
|
||||
]
|
||||
return issues
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator_check_element_safely
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DimensionInvariants:
|
||||
"""Helper object to ensure consistent dimension attributes across entities.
|
||||
|
||||
All dimensions with a given name in all entities should have attributes matching these values.
|
||||
"""
|
||||
|
||||
type: DimensionType
|
||||
is_partition: bool
|
||||
|
||||
|
||||
class ModelValidationRule(dbtClassMixin):
|
||||
"""Encapsulates logic for checking the values of objects in a model."""
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def validate_model(cls, model: UserConfiguredModel) -> List[ValidationIssueType]:
|
||||
"""Check the given model and return a list of validation issues"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def validate_model_serialized_for_multiprocessing(cls, serialized_model: str):
|
||||
"""Validate a model serialized via Pydantic's .json() method, and return a list of JSON serialized issues
|
||||
|
||||
This method exists because our validations are forked into parallel processes via
|
||||
multiprocessing.ProcessPoolExecutor, and passing a model or validation results object can result in
|
||||
idiosyncratic behavior and inscrutable errors due to interactions between pickling and pydantic objects.
|
||||
"""
|
||||
# TODO: Fix? This might be broken
|
||||
return ModelValidationResults.from_issues_sequence(
|
||||
cls.validate_model(UserConfiguredModel.parse_raw(serialized_model))
|
||||
).to_dict()
|
||||
|
||||
|
||||
class ModelValidationException(Exception):
|
||||
"""Exception raised when validation of a model fails."""
|
||||
|
||||
def __init__(self, issues: Tuple[ValidationIssueType, ...]) -> None: # noqa: D
|
||||
issues_str = "\n".join([x.as_readable_str(verbose=True) for x in issues])
|
||||
super().__init__(f"Error validating model. Issues:\n{issues_str}")
|
||||
|
||||
|
||||
class iter_bucket:
|
||||
"""
|
||||
NOTE: Copied over from more_itertools but we don't want the dependency.
|
||||
|
||||
Wrap *iterable* and return an object that buckets it iterable into
|
||||
child iterables based on a *key* function.
|
||||
"""
|
||||
|
||||
def __init__(self, iterable, key, validator=None):
|
||||
self._it = iter(iterable)
|
||||
self._key = key
|
||||
self._cache = defaultdict(deque)
|
||||
self._validator = validator or (lambda x: True)
|
||||
|
||||
def __contains__(self, value):
|
||||
if not self._validator(value):
|
||||
return False
|
||||
|
||||
try:
|
||||
item = next(self[value])
|
||||
except StopIteration:
|
||||
return False
|
||||
else:
|
||||
self._cache[value].appendleft(item)
|
||||
|
||||
return True
|
||||
|
||||
def _get_values(self, value):
|
||||
"""
|
||||
Helper to yield items from the parent iterator that match *value*.
|
||||
Items that don't match are stored in the local cache as they
|
||||
are encountered.
|
||||
"""
|
||||
while True:
|
||||
# If we've cached some items that match the target value, emit
|
||||
# the first one and evict it from the cache.
|
||||
if self._cache[value]:
|
||||
yield self._cache[value].popleft()
|
||||
# Otherwise we need to advance the parent iterator to search for
|
||||
# a matching item, caching the rest.
|
||||
else:
|
||||
while True:
|
||||
try:
|
||||
item = next(self._it)
|
||||
except StopIteration:
|
||||
return
|
||||
item_value = self._key(item)
|
||||
if item_value == value:
|
||||
yield item
|
||||
break
|
||||
elif self._validator(item_value):
|
||||
self._cache[item_value].append(item)
|
||||
|
||||
def __iter__(self):
|
||||
for item in self._it:
|
||||
item_value = self._key(item)
|
||||
if self._validator(item_value):
|
||||
self._cache[item_value].append(item)
|
||||
|
||||
yield from self._cache.keys()
|
||||
|
||||
def __getitem__(self, value):
|
||||
if not self._validator(value):
|
||||
return iter(())
|
||||
|
||||
return self._get_values(value)
|
||||
|
||||
|
||||
def iter_flatten(listOfLists):
|
||||
"""
|
||||
NOTE: Copied over from more_itertools but we don't want the dependency.
|
||||
|
||||
Return an iterator flattening one level of nesting in a list of lists.
|
||||
>>> list(flatten([[0, 1], [2, 3]]))
|
||||
[0, 1, 2, 3]
|
||||
See also :func:`collapse`, which can flatten multiple levels of nesting.
|
||||
"""
|
||||
return chain.from_iterable(listOfLists)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelBuildResult: # noqa: D
|
||||
model: UserConfiguredModel
|
||||
# Issues found in the model.
|
||||
issues: ModelValidationResults = ModelValidationResults()
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
|
||||
from dbt.contracts.graph.nodes import Exposure, SourceDefinition, Metric
|
||||
from dbt.contracts.graph.nodes import Exposure, SourceDefinition, Metric, Entity
|
||||
from dbt.flags import get_flags
|
||||
from dbt.graph import ResourceTypeSelector
|
||||
from dbt.task.runnable import GraphRunnableTask
|
||||
@@ -27,6 +27,7 @@ class ListTask(GraphRunnableTask):
|
||||
NodeType.Source,
|
||||
NodeType.Exposure,
|
||||
NodeType.Metric,
|
||||
NodeType.Entity,
|
||||
)
|
||||
)
|
||||
ALL_RESOURCE_VALUES = DEFAULT_RESOURCE_VALUES | frozenset((NodeType.Analysis,))
|
||||
@@ -73,6 +74,8 @@ class ListTask(GraphRunnableTask):
|
||||
yield self.manifest.exposures[node]
|
||||
elif node in self.manifest.metrics:
|
||||
yield self.manifest.metrics[node]
|
||||
elif node in self.manifest.entities:
|
||||
yield self.manifest.entities[node]
|
||||
else:
|
||||
raise DbtRuntimeError(
|
||||
f'Got an unexpected result from node selection: "{node}"'
|
||||
@@ -96,6 +99,11 @@ class ListTask(GraphRunnableTask):
|
||||
# metrics are searched for by pkg.metric_name
|
||||
metric_selector = ".".join([node.package_name, node.name])
|
||||
yield f"metric:{metric_selector}"
|
||||
elif node.resource_type == NodeType.Entity:
|
||||
assert isinstance(node, Entity)
|
||||
# entities are searched for by pkg.entity_name
|
||||
entity_selector = ".".join([node.package_name, node.name])
|
||||
yield f"entity:{entity_selector}"
|
||||
else:
|
||||
# everything else is from `fqn`
|
||||
yield ".".join(node.fqn)
|
||||
|
||||
21
core/dbt/task/validate.py
Normal file
21
core/dbt/task/validate.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# This task is intended to validate a semantic layer
|
||||
from dbt.task.runnable import GraphRunnableTask
|
||||
from dbt.events.types import DebugCmdOut
|
||||
from dbt.events.functions import fire_event
|
||||
|
||||
|
||||
class ValidateTask(GraphRunnableTask):
|
||||
def __init__(self, args, config, manifest):
|
||||
super().__init__(args, config, manifest)
|
||||
|
||||
def run(self):
|
||||
fire_event(DebugCmdOut(msg="Starting validation."))
|
||||
|
||||
GraphRunnableTask.load_manifest(self)
|
||||
|
||||
breakpoint()
|
||||
|
||||
fire_event(DebugCmdOut(msg="Callum still has more todos"))
|
||||
|
||||
fire_event(DebugCmdOut(msg="Validation completed!"))
|
||||
fire_event(DebugCmdOut(msg="Done."))
|
||||
@@ -64,6 +64,7 @@ setup(
|
||||
"typing-extensions>=3.7.4",
|
||||
"werkzeug>=1,<3",
|
||||
"pathspec>=0.9,<0.11",
|
||||
"mo-sql-parsing==9.328.23003",
|
||||
# the following are all to match snowflake-connector-python
|
||||
"requests<3.0.0",
|
||||
"idna>=2.5,<4",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,7 @@
|
||||
"docs",
|
||||
"exposures",
|
||||
"metrics",
|
||||
"entities",
|
||||
"selectors"
|
||||
],
|
||||
"properties": {
|
||||
@@ -85,6 +86,13 @@
|
||||
},
|
||||
"description": "The metrics defined in the dbt project and its dependencies"
|
||||
},
|
||||
"entities": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/Entity"
|
||||
},
|
||||
"description": "The entities defined in the dbt project and its dependencies"
|
||||
},
|
||||
"selectors": {
|
||||
"type": "object",
|
||||
"description": "The selectors defined in selectors.yml"
|
||||
@@ -173,7 +181,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]])",
|
||||
"description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], entities: Mapping[str, dbt.contracts.graph.nodes.Entity], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]])",
|
||||
"definitions": {
|
||||
"ManifestMetadata": {
|
||||
"type": "object",
|
||||
@@ -406,7 +414,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.353436
|
||||
"default": 1670902215.970579
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -460,6 +468,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -504,7 +522,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "AnalysisNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "AnalysisNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"FileHash": {
|
||||
"type": "object",
|
||||
@@ -953,7 +971,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.355371
|
||||
"default": 1670902215.973521
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -1007,6 +1025,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -1051,7 +1079,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "SingularTestNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "SingularTestNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"TestConfig": {
|
||||
"type": "object",
|
||||
@@ -1312,7 +1340,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.356482
|
||||
"default": 1670902215.975156
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -1366,6 +1394,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -1420,7 +1458,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "HookNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, index: Optional[int] = None)"
|
||||
"description": "HookNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, index: Optional[int] = None)"
|
||||
},
|
||||
"ModelNode": {
|
||||
"type": "object",
|
||||
@@ -1569,7 +1607,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.357701
|
||||
"default": 1670902215.976732
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -1623,6 +1661,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -1667,7 +1715,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "ModelNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "ModelNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"RPCNode": {
|
||||
"type": "object",
|
||||
@@ -1816,7 +1864,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.358761
|
||||
"default": 1670902215.978195
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -1870,6 +1918,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -1914,7 +1972,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "RPCNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "RPCNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"SqlNode": {
|
||||
"type": "object",
|
||||
@@ -2063,7 +2121,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.359803
|
||||
"default": 1670902215.979718
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -2117,6 +2175,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -2161,7 +2229,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "SqlNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "SqlNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"GenericTestNode": {
|
||||
"type": "object",
|
||||
@@ -2306,7 +2374,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.361009
|
||||
"default": 1670902215.981434
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -2360,6 +2428,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -2424,7 +2502,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "GenericTestNode(test_metadata: dbt.contracts.graph.nodes.TestMetadata, database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, column_name: Optional[str] = None, file_key_name: Optional[str] = None)"
|
||||
"description": "GenericTestNode(test_metadata: dbt.contracts.graph.nodes.TestMetadata, database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, column_name: Optional[str] = None, file_key_name: Optional[str] = None)"
|
||||
},
|
||||
"TestMetadata": {
|
||||
"type": "object",
|
||||
@@ -2577,7 +2655,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.364386
|
||||
"default": 1670902215.984685
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -2631,6 +2709,16 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
@@ -2675,7 +2763,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "SnapshotNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SnapshotConfig, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
"description": "SnapshotNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SnapshotConfig, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
|
||||
},
|
||||
"SnapshotConfig": {
|
||||
"type": "object",
|
||||
@@ -3030,7 +3118,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.366245
|
||||
"default": 1670902215.987447
|
||||
},
|
||||
"config_call_dict": {
|
||||
"type": "object",
|
||||
@@ -3068,7 +3156,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "SeedNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SeedConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', root_path: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.MacroDependsOn = <factory>)"
|
||||
"description": "SeedNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SeedConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', root_path: Optional[str] = None)"
|
||||
},
|
||||
"SeedConfig": {
|
||||
"type": "object",
|
||||
@@ -3416,7 +3504,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.368067
|
||||
"default": 1670902215.989922
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@@ -3526,12 +3614,12 @@
|
||||
},
|
||||
"dbt_version": {
|
||||
"type": "string",
|
||||
"default": "1.4.1"
|
||||
"default": "1.4.0a1"
|
||||
},
|
||||
"generated_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"default": "2023-02-09T10:04:47.347023Z"
|
||||
"default": "2022-12-13T03:30:15.961825Z"
|
||||
},
|
||||
"invocation_id": {
|
||||
"oneOf": [
|
||||
@@ -3542,7 +3630,7 @@
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"default": "f795bc66-f417-4007-af6e-f2e513d33790"
|
||||
"default": "4f2b967b-7e02-46de-a7ea-268a05e3fab1"
|
||||
},
|
||||
"env": {
|
||||
"type": "object",
|
||||
@@ -3553,7 +3641,7 @@
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "FreshnessMetadata(dbt_schema_version: str = <factory>, dbt_version: str = '1.4.1', generated_at: datetime.datetime = <factory>, invocation_id: Optional[str] = <factory>, env: Dict[str, str] = <factory>)"
|
||||
"description": "FreshnessMetadata(dbt_schema_version: str = <factory>, dbt_version: str = '1.4.0a1', generated_at: datetime.datetime = <factory>, invocation_id: Optional[str] = <factory>, env: Dict[str, str] = <factory>)"
|
||||
},
|
||||
"SourceFreshnessRuntimeError": {
|
||||
"type": "object",
|
||||
@@ -3895,7 +3983,7 @@
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.368656
|
||||
"default": 1670902215.990816
|
||||
},
|
||||
"supported_languages": {
|
||||
"oneOf": [
|
||||
@@ -4136,13 +4224,23 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.369866
|
||||
"default": 1670902215.993354
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Exposure(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], type: dbt.contracts.graph.unparsed.ExposureType, owner: dbt.contracts.graph.unparsed.ExposureOwner, description: str = '', label: Optional[str] = None, maturity: Optional[dbt.contracts.graph.unparsed.MaturityType] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.ExposureConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, url: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
|
||||
"description": "Exposure(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], type: dbt.contracts.graph.unparsed.ExposureType, owner: dbt.contracts.graph.unparsed.ExposureOwner, description: str = '', label: Optional[str] = None, maturity: Optional[dbt.contracts.graph.unparsed.MaturityType] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.ExposureConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, url: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, entities: List[List[str]] = <factory>, created_at: float = <factory>)"
|
||||
},
|
||||
"ExposureOwner": {
|
||||
"type": "object",
|
||||
@@ -4353,13 +4451,23 @@
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1675937087.371092
|
||||
"default": 1670902215.995033
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Metric(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], description: str, label: str, calculation_method: str, expression: str, filters: List[dbt.contracts.graph.unparsed.MetricFilter], time_grains: List[str], dimensions: List[str], timestamp: Optional[str] = None, window: Optional[dbt.contracts.graph.unparsed.MetricTime] = None, model: Optional[str] = None, model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.MetricConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
|
||||
"description": "Metric(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], description: str, label: str, calculation_method: str, timestamp: str, expression: str, filters: List[dbt.contracts.graph.unparsed.MetricFilter], time_grains: List[str], dimensions: List[str], window: Optional[dbt.contracts.graph.unparsed.MetricTime] = None, model: Optional[str] = None, model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.MetricConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
|
||||
},
|
||||
"MetricFilter": {
|
||||
"type": "object",
|
||||
@@ -4427,6 +4535,148 @@
|
||||
},
|
||||
"additionalProperties": true,
|
||||
"description": "MetricConfig(_extra: Dict[str, Any] = <factory>, enabled: bool = True)"
|
||||
},
|
||||
"Entity": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"name",
|
||||
"resource_type",
|
||||
"package_name",
|
||||
"path",
|
||||
"original_file_path",
|
||||
"unique_id",
|
||||
"fqn",
|
||||
"model",
|
||||
"description",
|
||||
"dimensions"
|
||||
],
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"resource_type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"entity"
|
||||
]
|
||||
},
|
||||
"package_name": {
|
||||
"type": "string"
|
||||
},
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"original_file_path": {
|
||||
"type": "string"
|
||||
},
|
||||
"unique_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"fqn": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"dimensions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model_unique_id": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"meta": {
|
||||
"type": "object",
|
||||
"default": {}
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"config": {
|
||||
"$ref": "#/definitions/EntityConfig",
|
||||
"default": {
|
||||
"enabled": true
|
||||
}
|
||||
},
|
||||
"unrendered_config": {
|
||||
"type": "object",
|
||||
"default": {}
|
||||
},
|
||||
"sources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"depends_on": {
|
||||
"$ref": "#/definitions/DependsOn",
|
||||
"default": {
|
||||
"macros": [],
|
||||
"nodes": []
|
||||
}
|
||||
},
|
||||
"refs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"entities": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"created_at": {
|
||||
"type": "number",
|
||||
"default": 1674510977.805523
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"description": "Entity(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], model: str, description: str, dimensions: List[str], model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.EntityConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, entities: List[List[str]] = <factory>, created_at: float = <factory>)"
|
||||
},
|
||||
"EntityConfig": {
|
||||
"type": "object",
|
||||
"required": [],
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
}
|
||||
},
|
||||
"additionalProperties": true,
|
||||
"description": "EntityConfig(_extra: Dict[str, Any] = <factory>, enabled: bool = True)"
|
||||
}
|
||||
},
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
|
||||
@@ -35,6 +35,7 @@ def basic_uncompiled_model():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
deferred=False,
|
||||
description='',
|
||||
@@ -67,6 +68,7 @@ def basic_compiled_model():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
deferred=True,
|
||||
description='',
|
||||
@@ -124,6 +126,7 @@ def basic_uncompiled_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities':[],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'deferred': False,
|
||||
@@ -173,6 +176,7 @@ def basic_compiled_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities':[],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'deferred': True,
|
||||
@@ -363,6 +367,7 @@ def basic_uncompiled_schema_test_node():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
deferred=False,
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
@@ -396,6 +401,7 @@ def basic_compiled_schema_test_node():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
deferred=False,
|
||||
description='',
|
||||
@@ -435,6 +441,7 @@ def basic_uncompiled_schema_test_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities':[],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'description': '',
|
||||
@@ -485,6 +492,7 @@ def basic_compiled_schema_test_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'deferred': False,
|
||||
'database': 'test_db',
|
||||
|
||||
@@ -10,6 +10,8 @@ from dbt.contracts.graph.model_config import (
|
||||
SnapshotConfig,
|
||||
SourceConfig,
|
||||
ExposureConfig,
|
||||
MetricConfig,
|
||||
EntityConfig,
|
||||
EmptySnapshotConfig,
|
||||
Hook,
|
||||
)
|
||||
@@ -24,6 +26,7 @@ from dbt.contracts.graph.nodes import (
|
||||
Macro,
|
||||
Exposure,
|
||||
Metric,
|
||||
Entity,
|
||||
SeedNode,
|
||||
Docs,
|
||||
MacroDependsOn,
|
||||
@@ -142,6 +145,7 @@ def base_parsed_model_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'description': '',
|
||||
@@ -191,6 +195,7 @@ def basic_parsed_model_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -241,6 +246,7 @@ def complex_parsed_model_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
|
||||
'database': 'test_db',
|
||||
'deferred': True,
|
||||
@@ -301,6 +307,7 @@ def complex_parsed_model_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.test.bar']),
|
||||
deferred=True,
|
||||
description='My parsed node',
|
||||
@@ -734,6 +741,7 @@ def patched_model_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='The foo model',
|
||||
database='test_db',
|
||||
@@ -794,6 +802,7 @@ def base_parsed_hook_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'deferred': False,
|
||||
@@ -842,6 +851,7 @@ def base_parsed_hook_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
deferred=False,
|
||||
@@ -872,6 +882,7 @@ def complex_parsed_hook_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
|
||||
'deferred': False,
|
||||
'database': 'test_db',
|
||||
@@ -931,6 +942,7 @@ def complex_parsed_hook_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.test.bar']),
|
||||
description='My parsed node',
|
||||
deferred=False,
|
||||
@@ -1025,6 +1037,7 @@ def basic_parsed_schema_test_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'deferred': False,
|
||||
'database': 'test_db',
|
||||
@@ -1072,6 +1085,7 @@ def basic_parsed_schema_test_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -1101,6 +1115,7 @@ def complex_parsed_schema_test_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
|
||||
'database': 'test_db',
|
||||
'deferred': False,
|
||||
@@ -1165,6 +1180,7 @@ def complex_parsed_schema_test_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.test.bar']),
|
||||
description='My parsed node',
|
||||
database='test_db',
|
||||
@@ -1463,6 +1479,7 @@ def basic_timestamp_snapshot_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'deferred': False,
|
||||
'database': 'test_db',
|
||||
@@ -1522,6 +1539,7 @@ def basic_timestamp_snapshot_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -1570,6 +1588,7 @@ def basic_intermediate_timestamp_snapshot_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -1605,6 +1624,7 @@ def basic_check_snapshot_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'depends_on': {'macros': [], 'nodes': []},
|
||||
'database': 'test_db',
|
||||
'deferred': False,
|
||||
@@ -1664,6 +1684,7 @@ def basic_check_snapshot_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -1712,6 +1733,7 @@ def basic_intermediate_check_snapshot_object():
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
description='',
|
||||
database='test_db',
|
||||
@@ -2152,6 +2174,7 @@ def basic_parsed_exposure_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'fqn': ['test', 'exposures', 'my_exposure'],
|
||||
'unique_id': 'exposure.test.my_exposure',
|
||||
'package_name': 'test',
|
||||
@@ -2214,6 +2237,7 @@ def complex_parsed_exposure_dict():
|
||||
'refs': [],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'fqn': ['test', 'exposures', 'my_exposure'],
|
||||
'unique_id': 'exposure.test.my_exposure',
|
||||
'package_name': 'test',
|
||||
@@ -2288,47 +2312,24 @@ def test_compare_changed_exposure(func, basic_parsed_exposure_object):
|
||||
|
||||
|
||||
# METRICS
|
||||
@pytest.fixture
|
||||
def minimal_parsed_metric_dict():
|
||||
return {
|
||||
'name': 'my_metric',
|
||||
'type': 'count',
|
||||
'timestamp': 'created_at',
|
||||
'time_grains': ['day'],
|
||||
'fqn': ['test', 'metrics', 'my_metric'],
|
||||
'unique_id': 'metric.test.my_metric',
|
||||
'package_name': 'test',
|
||||
'meta': {},
|
||||
'tags': [],
|
||||
'path': 'models/something.yml',
|
||||
'original_file_path': 'models/something.yml',
|
||||
'description': '',
|
||||
'created_at': 1.0,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def basic_parsed_metric_dict():
|
||||
return {
|
||||
'name': 'new_customers',
|
||||
'label': 'New Customers',
|
||||
'model': 'ref("dim_customers")',
|
||||
'model': "ref('dim_customers')",
|
||||
'calculation_method': 'count',
|
||||
'expression': 'user_id',
|
||||
'timestamp': 'signup_date',
|
||||
'time_grains': ['day', 'week', 'month'],
|
||||
'dimensions': ['plan', 'country'],
|
||||
'filters': [
|
||||
{
|
||||
"field": "is_paying",
|
||||
"value": "true",
|
||||
"operator": "=",
|
||||
}
|
||||
],
|
||||
'filters': [],
|
||||
'resource_type': 'metric',
|
||||
'refs': [['dim_customers']],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'fqn': ['test', 'metrics', 'my_metric'],
|
||||
'unique_id': 'metric.test.my_metric',
|
||||
'package_name': 'test',
|
||||
@@ -2337,26 +2338,98 @@ def basic_parsed_metric_dict():
|
||||
'description': '',
|
||||
'meta': {},
|
||||
'tags': [],
|
||||
'created_at': 1,
|
||||
'depends_on': {
|
||||
'nodes': [],
|
||||
'macros': [],
|
||||
},
|
||||
'config': {
|
||||
'enabled': True,
|
||||
},
|
||||
'unrendered_config': {},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def basic_parsed_metric_object():
|
||||
return Metric(
|
||||
name='new_customers',
|
||||
resource_type=NodeType.Metric,
|
||||
model="ref('dim_customers')",
|
||||
label='New Customers',
|
||||
calculation_method='count',
|
||||
expression="user_id",
|
||||
timestamp='signup_date',
|
||||
time_grains=['day','week','month'],
|
||||
dimensions=['plan','country'],
|
||||
filters=[],
|
||||
refs=[['dim_customers']],
|
||||
fqn=['test', 'metrics', 'my_metric'],
|
||||
unique_id='metric.test.my_metric',
|
||||
package_name='test',
|
||||
path='models/something.yml',
|
||||
original_file_path='models/something.yml',
|
||||
description='',
|
||||
meta={},
|
||||
tags=[],
|
||||
config=MetricConfig(),
|
||||
unrendered_config={},
|
||||
)
|
||||
|
||||
def test_simple_parsed_metric(basic_parsed_metric_dict, basic_parsed_metric_object):
|
||||
assert_symmetric(basic_parsed_metric_object, basic_parsed_metric_dict, Metric)
|
||||
|
||||
# ENTITIES
|
||||
|
||||
@pytest.fixture
|
||||
def basic_parsed_entity_dict():
|
||||
return {
|
||||
'name': 'my_entity',
|
||||
'model': "ref('my_model')",
|
||||
'dimensions': [],
|
||||
'resource_type': 'entity',
|
||||
'refs': [['my_model']],
|
||||
'sources': [],
|
||||
'metrics': [],
|
||||
'entities': [],
|
||||
'fqn': ['test', 'entities', 'my_entity'],
|
||||
'unique_id': 'entity.test.my_entity',
|
||||
'package_name': 'test',
|
||||
'path': 'models/something.yml',
|
||||
'original_file_path': 'models/something.yml',
|
||||
'description': '',
|
||||
'meta': {},
|
||||
'tags': [],
|
||||
'created_at': 1.0,
|
||||
'depends_on': {
|
||||
'nodes': [],
|
||||
'macros': [],
|
||||
},
|
||||
'config': {
|
||||
'enabled': True,
|
||||
},
|
||||
'unrendered_config': {},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def basic_parsed_metric_object():
|
||||
return Metric(
|
||||
name='my_metric',
|
||||
resource_type=NodeType.Metric,
|
||||
calculation_method='count',
|
||||
fqn=['test', 'metrics', 'my_metric'],
|
||||
unique_id='metric.test.my_metric',
|
||||
def basic_parsed_entity_object():
|
||||
return Entity(
|
||||
name='my_entity',
|
||||
model="ref('my_model')",
|
||||
dimensions=[],
|
||||
resource_type=NodeType.Entity,
|
||||
fqn=['test', 'entities', 'my_entity'],
|
||||
refs=[['my_model']],
|
||||
unique_id='entity.test.my_entity',
|
||||
package_name='test',
|
||||
path='models/something.yml',
|
||||
original_file_path='models/something.yml',
|
||||
description='',
|
||||
meta={},
|
||||
tags=[]
|
||||
tags=[],
|
||||
config=EntityConfig(),
|
||||
unrendered_config={},
|
||||
)
|
||||
|
||||
def test_simple_parsed_entity(basic_parsed_entity_dict, basic_parsed_entity_object):
|
||||
assert_symmetric(basic_parsed_entity_object, basic_parsed_entity_dict, Entity)
|
||||
@@ -786,3 +786,49 @@ class TestUnparsedMetric(ContractTestCase):
|
||||
tst = self.get_ok_dict()
|
||||
tst['tags'] = [123]
|
||||
self.assert_fails_validation(tst)
|
||||
|
||||
|
||||
|
||||
|
||||
class TestUnparsedEntity(ContractTestCase):
|
||||
ContractType = UnparsedEntity
|
||||
|
||||
def get_ok_dict(self):
|
||||
return {
|
||||
'name': 'my_entity',
|
||||
'model': "ref('my_model')",
|
||||
'description': 'my model',
|
||||
'dimensions': ['plan', 'country'],
|
||||
'config': {},
|
||||
'tags': [],
|
||||
'meta': {},
|
||||
}
|
||||
|
||||
def test_ok(self):
|
||||
metric = self.ContractType(
|
||||
name='my_entity',
|
||||
model="ref('my_model')",
|
||||
description="my model",
|
||||
dimensions=['plan', 'country'],
|
||||
config={},
|
||||
tags=[],
|
||||
meta={},
|
||||
)
|
||||
dct = self.get_ok_dict()
|
||||
self.assert_symmetric(metric, dct)
|
||||
pickle.loads(pickle.dumps(metric))
|
||||
|
||||
def test_bad_entity_no_name(self):
|
||||
tst = self.get_ok_dict()
|
||||
del tst['name']
|
||||
self.assert_fails_validation(tst)
|
||||
|
||||
def test_bad_entity_no_model(self):
|
||||
tst = self.get_ok_dict()
|
||||
del tst['model']
|
||||
self.assert_fails_validation(tst)
|
||||
|
||||
def test_bad_tags(self):
|
||||
tst = self.get_ok_dict()
|
||||
tst['tags'] = [123]
|
||||
self.assert_fails_validation(tst)
|
||||
|
||||
@@ -127,6 +127,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[['multi']],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
fqn=['root', 'my_metric'],
|
||||
unique_id='metric.root.my_metric',
|
||||
package_name='root',
|
||||
@@ -160,6 +161,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -182,6 +184,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -204,6 +207,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[['events']],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.root.events']),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -226,6 +230,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[['events']],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.root.dep']),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -248,6 +253,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[['events']],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.root.events']),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -270,6 +276,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[['events']],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(nodes=['model.root.nested', 'model.root.sibling']),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -304,6 +311,8 @@ class ManifestTest(unittest.TestCase):
|
||||
exposure.validate(exposure.to_dict(omit_none=True))
|
||||
for metric in self.metrics.values():
|
||||
metric.validate(metric.to_dict(omit_none=True))
|
||||
for entity in self.entities.values():
|
||||
entity.validate(entity.to_dict(omit_none=True))
|
||||
for node in self.nested_nodes.values():
|
||||
node.validate(node.to_dict(omit_none=True))
|
||||
for source in self.sources.values():
|
||||
@@ -319,7 +328,7 @@ class ManifestTest(unittest.TestCase):
|
||||
def test__no_nodes(self):
|
||||
manifest = Manifest(
|
||||
nodes={}, sources={}, macros={}, docs={}, disabled={}, files={},
|
||||
exposures={}, metrics={}, selectors={},
|
||||
exposures={}, metrics={}, selectors={}, entities={},
|
||||
metadata=ManifestMetadata(generated_at=datetime.utcnow()),
|
||||
)
|
||||
|
||||
@@ -354,7 +363,7 @@ class ManifestTest(unittest.TestCase):
|
||||
nodes = copy.copy(self.nested_nodes)
|
||||
manifest = Manifest(
|
||||
nodes=nodes, sources={}, macros={}, docs={}, disabled={}, files={},
|
||||
exposures={}, metrics={}, selectors={},
|
||||
exposures={}, metrics={}, entities={}, selectors={},
|
||||
metadata=ManifestMetadata(generated_at=datetime.utcnow()),
|
||||
)
|
||||
serialized = manifest.writable_manifest().to_dict(omit_none=True)
|
||||
@@ -431,12 +440,14 @@ class ManifestTest(unittest.TestCase):
|
||||
flat_exposures = flat_graph['exposures']
|
||||
flat_groups = flat_graph['groups']
|
||||
flat_metrics = flat_graph['metrics']
|
||||
flat_entities = flat_graph['entities']
|
||||
flat_nodes = flat_graph['nodes']
|
||||
flat_sources = flat_graph['sources']
|
||||
self.assertEqual(set(flat_graph), set(['exposures', 'groups', 'nodes', 'sources', 'metrics']))
|
||||
self.assertEqual(set(flat_exposures), set(self.exposures))
|
||||
self.assertEqual(set(flat_groups), set(self.groups))
|
||||
self.assertEqual(set(flat_metrics), set(self.metrics))
|
||||
self.assertEqual(set(flat_entities), set(self.entities))
|
||||
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
|
||||
self.assertEqual(set(flat_sources), set(self.sources))
|
||||
for node in flat_nodes.values():
|
||||
@@ -532,11 +543,14 @@ class ManifestTest(unittest.TestCase):
|
||||
)
|
||||
manifest = Manifest(nodes=nodes, sources=self.sources, macros={}, docs={},
|
||||
disabled={}, files={}, exposures=self.exposures,
|
||||
metrics=self.metrics, selectors={})
|
||||
metrics=self.metrics, entities=self.entities, selectors={})
|
||||
expect = {
|
||||
'metrics': frozenset([
|
||||
('root', 'my_metric')
|
||||
]),
|
||||
'entities': frozenset([
|
||||
('root', 'my_entity')
|
||||
]),
|
||||
'exposures': frozenset([
|
||||
('root', 'my_exposure')
|
||||
]),
|
||||
@@ -571,6 +585,7 @@ class ManifestTest(unittest.TestCase):
|
||||
refs=[],
|
||||
sources=[],
|
||||
metrics=[],
|
||||
entities=[],
|
||||
depends_on=DependsOn(),
|
||||
config=self.model_config,
|
||||
tags=[],
|
||||
@@ -895,6 +910,7 @@ class TestManifestSearch(unittest.TestCase):
|
||||
files={},
|
||||
exposures={},
|
||||
metrics={},
|
||||
entities={},
|
||||
selectors={},
|
||||
)
|
||||
|
||||
@@ -917,6 +933,7 @@ def make_manifest(nodes=[], sources=[], macros=[], docs=[]):
|
||||
files={},
|
||||
exposures={},
|
||||
metrics={},
|
||||
entities={},
|
||||
selectors={},
|
||||
)
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ node_type_pluralizations = {
|
||||
NodeType.Macro: "macros",
|
||||
NodeType.Exposure: "exposures",
|
||||
NodeType.Metric: "metrics",
|
||||
NodeType.Entity: "entities",
|
||||
NodeType.Group: "groups",
|
||||
}
|
||||
|
||||
|
||||
4
testing-project/postgres/.gitignore
vendored
Normal file
4
testing-project/postgres/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
target/
|
||||
dbt_packages/
|
||||
logs/
|
||||
32
testing-project/postgres/dbt_project.yml
Normal file
32
testing-project/postgres/dbt_project.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
# Name your project! Project names should contain only lowercase characters
|
||||
# and underscores. A good package name should reflect your organization's
|
||||
# name or the intended use of these models
|
||||
name: 'postgres'
|
||||
version: '1.0.0'
|
||||
config-version: 2
|
||||
|
||||
# This setting configures which "profile" dbt uses for this project.
|
||||
profile: 'user'
|
||||
|
||||
# These configurations specify where dbt should look for different types of files.
|
||||
# The `model-paths` config, for example, states that models in this project can be
|
||||
# found in the "models/" directory. You probably won't need to change these!
|
||||
model-paths: ["models"]
|
||||
analysis-paths: ["analyses"]
|
||||
test-paths: ["tests"]
|
||||
seed-paths: ["seeds"]
|
||||
macro-paths: ["macros"]
|
||||
snapshot-paths: ["snapshots"]
|
||||
|
||||
target-path: "target" # directory which will store compiled SQL files
|
||||
clean-targets: # directories to be removed by `dbt clean`
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
|
||||
|
||||
# Configuring models
|
||||
# Full documentation: https://docs.getdbt.com/docs/configuring-models
|
||||
|
||||
# In this example config, we tell dbt to build all models in the example/
|
||||
# directory as views. These settings can be overridden in the individual model
|
||||
# files using the `{{ config(...) }}` macro.
|
||||
@@ -0,0 +1,21 @@
|
||||
with orders as (
|
||||
|
||||
select * from {{ ref('fact_orders') }}
|
||||
|
||||
)
|
||||
,
|
||||
customers as (
|
||||
|
||||
select * from {{ ref('dim_customers') }}
|
||||
|
||||
)
|
||||
,
|
||||
final as (
|
||||
|
||||
select *
|
||||
from orders
|
||||
left join customers using (customer_id)
|
||||
|
||||
)
|
||||
|
||||
select * from final
|
||||
1
testing-project/postgres/models/dim_customers.sql
Normal file
1
testing-project/postgres/models/dim_customers.sql
Normal file
@@ -0,0 +1 @@
|
||||
select * from {{ref('dim_customers_source')}}
|
||||
21
testing-project/postgres/models/dim_customers.yml
Normal file
21
testing-project/postgres/models/dim_customers.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
version: 2
|
||||
models:
|
||||
- name: dim_customers
|
||||
columns:
|
||||
- name: customer_id
|
||||
description: TBD
|
||||
|
||||
- name: first_name
|
||||
description: TBD
|
||||
|
||||
- name: last_name
|
||||
description: TBD
|
||||
|
||||
- name: email
|
||||
description: TBD
|
||||
|
||||
- name: gender
|
||||
description: TBD
|
||||
|
||||
- name: is_new_customer
|
||||
description: TBD
|
||||
23
testing-project/postgres/models/entities/customers.yml
Normal file
23
testing-project/postgres/models/entities/customers.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
version: 2
|
||||
entities:
|
||||
|
||||
- name: customers
|
||||
model: ref('dim_customers')
|
||||
description: "Our customers entity"
|
||||
|
||||
identifiers:
|
||||
- name: customer
|
||||
type: primary
|
||||
expr: customer_id
|
||||
|
||||
dimensions:
|
||||
- name: first_name
|
||||
type: categorical
|
||||
|
||||
|
||||
# description: string
|
||||
# column_name: first_name
|
||||
# date_type: string
|
||||
# default_timestamp: true
|
||||
# primary_key: true
|
||||
# time_grains: [day, week, month]
|
||||
20
testing-project/postgres/models/entities/exposure.yml
Normal file
20
testing-project/postgres/models/entities/exposure.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: 2
|
||||
|
||||
exposures:
|
||||
- name: weekly_metrics
|
||||
label: Some Label
|
||||
type: dashboard
|
||||
maturity: high
|
||||
url: https://bi.tool/dashboards/1
|
||||
description: >
|
||||
Did someone say "exponential growth"?
|
||||
|
||||
depends_on:
|
||||
- ref('fact_orders')
|
||||
- ref('dim_customers')
|
||||
# - metric('revenue')
|
||||
- entity('orders')
|
||||
|
||||
owner:
|
||||
name: Callum McData
|
||||
email: data@jaffleshop.com
|
||||
110
testing-project/postgres/models/entities/orders.yml
Normal file
110
testing-project/postgres/models/entities/orders.yml
Normal file
@@ -0,0 +1,110 @@
|
||||
version: 2
|
||||
|
||||
entities:
|
||||
- name: orders
|
||||
model: ref('fact_orders')
|
||||
description: "Our orders entity"
|
||||
identifiers:
|
||||
- name: order_id
|
||||
type: primary
|
||||
- name: customer
|
||||
type: foreign
|
||||
expr: customer_id
|
||||
dimensions:
|
||||
- name: order_date
|
||||
type: time
|
||||
type_params:
|
||||
time_granularity: day
|
||||
|
||||
- name: order_date_one
|
||||
type: time
|
||||
expr: order_date
|
||||
type_params:
|
||||
is_primary: true
|
||||
time_granularity: day
|
||||
|
||||
- name: order_location
|
||||
type: categorical
|
||||
expr: order_country
|
||||
|
||||
|
||||
measures:
|
||||
- name: order_total
|
||||
description: "The total value of the order"
|
||||
agg: sum
|
||||
|
||||
- name: sales
|
||||
description: "The total sale of the order"
|
||||
agg: sum
|
||||
expr: order_total
|
||||
create_metric: True
|
||||
|
||||
- name: median_sales
|
||||
description: "The median sale of the order"
|
||||
agg: median
|
||||
expr: order_total
|
||||
create_metric: True
|
||||
|
||||
- name: testing_count
|
||||
description: "Testing count functionality"
|
||||
agg: count
|
||||
expr: order_id
|
||||
create_metric: True
|
||||
|
||||
metrics:
|
||||
- name: revenue
|
||||
entity: entity('orders')
|
||||
description: "some description"
|
||||
type: measure_proxy
|
||||
type_params:
|
||||
measure: order_total
|
||||
|
||||
- name: testing_metric_constraint
|
||||
entity: entity('orders')
|
||||
description: "some description"
|
||||
type: measure_proxy
|
||||
type_params:
|
||||
measure: order_total
|
||||
constraint: |
|
||||
order_location = 'Unovo'
|
||||
|
||||
- name: sales_minus_revenue
|
||||
type: derived
|
||||
type_params:
|
||||
expr: sales - revenue
|
||||
metrics:
|
||||
- sales
|
||||
- revenue
|
||||
|
||||
- name: constraint_derived
|
||||
type: derived
|
||||
type_params:
|
||||
expr: sales - revenue
|
||||
metrics:
|
||||
- sales
|
||||
- name: revenue
|
||||
constraint: |
|
||||
order_location = 'Unovo'
|
||||
|
||||
- name: cancellation_rate
|
||||
type: ratio
|
||||
entity: entity('orders')
|
||||
type_params:
|
||||
numerator: order_total
|
||||
denominator: median_sales
|
||||
|
||||
- name: wau_rolling_7
|
||||
type: cumulative
|
||||
entity: entity('orders')
|
||||
type_params:
|
||||
measure: order_total
|
||||
window: 7 days
|
||||
|
||||
- name: revenue_usd
|
||||
type: expr
|
||||
entity: entity('orders')
|
||||
type_params:
|
||||
expr: order_total - sales
|
||||
measures:
|
||||
- order_total
|
||||
- sales
|
||||
9
testing-project/postgres/models/fact_orders.sql
Normal file
9
testing-project/postgres/models/fact_orders.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
select
|
||||
order_id
|
||||
,order_country
|
||||
,order_total
|
||||
,had_discount
|
||||
,customer_id
|
||||
,to_date(order_date,'MM/DD/YYYY') as order_date
|
||||
,round(order_total - (order_total/2)) as discount_total
|
||||
from {{ref('fact_orders_source')}}
|
||||
21
testing-project/postgres/models/fact_orders.yml
Normal file
21
testing-project/postgres/models/fact_orders.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
version: 2
|
||||
models:
|
||||
- name: fact_orders
|
||||
columns:
|
||||
- name: order_id
|
||||
description: TBD
|
||||
|
||||
- name: order_country
|
||||
description: TBD
|
||||
|
||||
- name: order_total
|
||||
description: TBD
|
||||
|
||||
- name: had_discount
|
||||
description: TBD
|
||||
|
||||
- name: customer_id
|
||||
description: TBD
|
||||
|
||||
- name: order_date
|
||||
description: TBD
|
||||
7
testing-project/postgres/models/values_testing.sql
Normal file
7
testing-project/postgres/models/values_testing.sql
Normal file
@@ -0,0 +1,7 @@
|
||||
select 1 as test
|
||||
{# {% set mf = modules.mf %} #}
|
||||
{# {% set explain_plan = mf.explain(metrics=["sales"]) %} #}
|
||||
{# {% set explain_plan = mf.explain(self,metrics=["sales"]) %} #}
|
||||
|
||||
|
||||
{# select "{{manifest}}" as explain_plan #}
|
||||
6
testing-project/postgres/seeds/dim_customers_source.csv
Normal file
6
testing-project/postgres/seeds/dim_customers_source.csv
Normal file
@@ -0,0 +1,6 @@
|
||||
customer_id,first_name,last_name,email,gender,is_new_customer,date_added
|
||||
1,Geodude,Pokemon,rocks@pokemon.org,Male,FALSE,2022-01-01
|
||||
2,Mew,Pokemon,mew.is.the.best@pokemon.com,Genderfluid,TRUE,2022-01-06
|
||||
3,Mewtwo,Pokemon,no.mewtwo.is.better@pokemon.com,Genderqueer,FALSE,2022-01-13
|
||||
4,Charizard,Pokemon,firebreathbaby@pokemon.com,Female,TRUE,2022-02-01
|
||||
5,Snorlax,Pokemon,sleep@pokemon.com,Male,TRUE,2022-02-03
|
||||
|
11
testing-project/postgres/seeds/fact_orders_source.csv
Normal file
11
testing-project/postgres/seeds/fact_orders_source.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
order_id,order_country,order_total,had_discount,customer_id,order_date
|
||||
1,Unovo,2,false,1,01/28/2022
|
||||
2,Kalos,1,false,2,01/20/2022
|
||||
3,Kalos,1,false,1,01/13/2022
|
||||
4,Alola,1,true,3,01/06/2022
|
||||
5,Alola,1,false,4,01/08/2022
|
||||
6,Kanto,1,false,5,01/21/2022
|
||||
7,Alola,1,true,2,01/22/2022
|
||||
8,Kanto,0,true,1,02/15/2022
|
||||
9,Unovo,1,false,2,02/03/2022
|
||||
10,Kanto,1,false,3,02/13/2022
|
||||
|
@@ -265,6 +265,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"unique_id": "model.test.model",
|
||||
"fqn": ["test", "model"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"tags": [],
|
||||
"meta": {},
|
||||
"config": model_config,
|
||||
@@ -359,6 +360,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"unique_id": "model.test.second_model",
|
||||
"fqn": ["test", "second_model"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"tags": [],
|
||||
"meta": {},
|
||||
"config": second_config,
|
||||
@@ -535,6 +537,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"file_key_name": "models.model",
|
||||
"fqn": ["test", "not_null_model_id"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "not_null_model_id",
|
||||
"original_file_path": model_schema_yml_path,
|
||||
"package_name": "test",
|
||||
@@ -591,6 +594,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"extra_ctes_injected": True,
|
||||
"fqn": ["test", "snapshot_seed", "snapshot_seed"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"meta": {},
|
||||
"name": "snapshot_seed",
|
||||
"original_file_path": snapshot_path,
|
||||
@@ -637,6 +641,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"file_key_name": "models.model",
|
||||
"fqn": ["test", "test_nothing_model_"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "test_nothing_model_",
|
||||
"original_file_path": model_schema_yml_path,
|
||||
"package_name": "test",
|
||||
@@ -690,6 +695,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"file_key_name": "models.model",
|
||||
"fqn": ["test", "unique_model_id"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "unique_model_id",
|
||||
"original_file_path": model_schema_yml_path,
|
||||
"package_name": "test",
|
||||
@@ -793,6 +799,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"maturity": "medium",
|
||||
"meta": {"tool": "my_tool", "languages": ["python"]},
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"tags": ["my_department"],
|
||||
"name": "notebook_exposure",
|
||||
"original_file_path": os.path.join("models", "schema.yml"),
|
||||
@@ -820,6 +827,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
},
|
||||
"fqn": ["test", "simple_exposure"],
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "simple_exposure",
|
||||
"original_file_path": os.path.join("models", "schema.yml"),
|
||||
"owner": {
|
||||
@@ -841,6 +849,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
},
|
||||
},
|
||||
"metrics": {},
|
||||
"entities": {},
|
||||
"groups": {},
|
||||
"selectors": {},
|
||||
"parent_map": {
|
||||
@@ -926,6 +935,7 @@ def expected_references_manifest(project):
|
||||
"fqn": ["test", "ephemeral_copy"],
|
||||
"group": None,
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "ephemeral_copy",
|
||||
"original_file_path": ephemeral_copy_path,
|
||||
"package_name": "test",
|
||||
@@ -987,6 +997,7 @@ def expected_references_manifest(project):
|
||||
"fqn": ["test", "ephemeral_summary"],
|
||||
"group": "test_group",
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "ephemeral_summary",
|
||||
"original_file_path": ephemeral_summary_path,
|
||||
"package_name": "test",
|
||||
@@ -1051,6 +1062,7 @@ def expected_references_manifest(project):
|
||||
"fqn": ["test", "view_summary"],
|
||||
"group": None,
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"name": "view_summary",
|
||||
"original_file_path": view_summary_path,
|
||||
"package_name": "test",
|
||||
@@ -1175,6 +1187,7 @@ def expected_references_manifest(project):
|
||||
"fqn": ["test", "snapshot_seed", "snapshot_seed"],
|
||||
"group": None,
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"meta": {},
|
||||
"name": "snapshot_seed",
|
||||
"original_file_path": snapshot_path,
|
||||
@@ -1263,6 +1276,7 @@ def expected_references_manifest(project):
|
||||
"maturity": "medium",
|
||||
"meta": {"tool": "my_tool", "languages": ["python"]},
|
||||
"metrics": [],
|
||||
"entities": [],
|
||||
"tags": ["my_department"],
|
||||
"name": "notebook_exposure",
|
||||
"original_file_path": os.path.join("models", "schema.yml"),
|
||||
@@ -1279,6 +1293,7 @@ def expected_references_manifest(project):
|
||||
},
|
||||
},
|
||||
"metrics": {},
|
||||
"entities": {},
|
||||
"groups": {
|
||||
"group.test.test_group": {
|
||||
"name": "test_group",
|
||||
|
||||
@@ -362,6 +362,7 @@ def verify_manifest(project, expected_manifest, start_time, manifest_schema_path
|
||||
"child_map",
|
||||
"group_map",
|
||||
"metrics",
|
||||
"entities",
|
||||
"groups",
|
||||
"docs",
|
||||
"metadata",
|
||||
@@ -389,7 +390,7 @@ def verify_manifest(project, expected_manifest, start_time, manifest_schema_path
|
||||
and metadata["send_anonymous_usage_stats"] is False
|
||||
)
|
||||
assert "adapter_type" in metadata and metadata["adapter_type"] == project.adapter_type
|
||||
elif key in ["nodes", "sources", "exposures", "metrics", "disabled", "docs"]:
|
||||
elif key in ["nodes", "sources", "exposures", "metrics", "entities", "disabled", "docs"]:
|
||||
for unique_id, node in expected_manifest[key].items():
|
||||
assert unique_id in manifest[key]
|
||||
assert manifest[key][unique_id] == node, f"{unique_id} did not match"
|
||||
|
||||
@@ -318,10 +318,10 @@ class TestPreviousVersionState:
|
||||
|
||||
def test_backwards_compatible_versions(self, project):
|
||||
# manifest schema version 4 and greater should always be forward compatible
|
||||
for schema_version in range(4, self.CURRENT_EXPECTED_MANIFEST_VERSION):
|
||||
for schema_version in range(8, self.CURRENT_EXPECTED_MANIFEST_VERSION):
|
||||
self.compare_previous_state(project, schema_version, True)
|
||||
|
||||
def test_nonbackwards_compatible_versions(self, project):
|
||||
# schema versions 1, 2, 3 are all not forward compatible
|
||||
for schema_version in range(1, 4):
|
||||
for schema_version in range(1, 7):
|
||||
self.compare_previous_state(project, schema_version, False)
|
||||
|
||||
Reference in New Issue
Block a user