Compare commits

...

45 Commits

Author SHA1 Message Date
Callum McCann
ec1c9f0362 fixing metricflow not being present 2023-03-13 17:34:02 -07:00
Callum McCann
caeecf4a67 switching to postgres 2023-03-09 10:29:27 -08:00
Callum McCann
fcea5969ae adding metricflow as context macro library 2023-03-07 09:12:27 -08:00
Callum McCann
672a7d7fc8 straight dbt query 2023-03-03 08:43:43 -08:00
Callum McCann
a033aa0180 more updates for mypy 2023-03-02 15:19:41 -08:00
Callum McCann
38991cd3a9 updates for mypy 2023-02-28 13:14:10 -08:00
Callum McCann
4f966b0d54 fixes for precommit 2023-02-28 10:20:10 -08:00
Callum McCann
d3eaa37212 pre-commit fixes 2023-02-28 09:41:56 -08:00
Callum McCann
ce1759f793 fixing metric input 2023-02-28 09:37:01 -08:00
Callum McCann
4b3e797530 cli embedding and moving UCM 2023-02-27 15:43:23 -08:00
Callum McCann
3e9ed1ff9b Merge branch 'main' into merging-main 2023-02-27 11:58:16 -08:00
Callum McCann
9b0dce32b8 testing metricflow 2023-02-27 11:42:45 -08:00
Callum McCann
ce8e886f38 changing to snowflake for mf compatability 2023-02-26 14:31:31 -08:00
Callum McCann
12d02dc9d1 allowing hashable metric inputs 2023-02-26 14:24:04 -08:00
Callum McCann
72f59da9df renaming element name to name 2023-02-23 16:58:30 -06:00
Callum McCann
0dd99eac83 renaming element_name to name 2023-02-23 16:58:21 -06:00
Callum McCann
f0564f9d32 finally got constraints working 2023-02-23 14:28:28 -06:00
Callum McCann
278e4c7673 adding constraints but they do not work yet 2023-02-22 15:04:17 -06:00
Callum McCann
e3ec07d035 removing unused imports 2023-02-21 10:14:41 -06:00
Callum McCann
c7aa2ed7ef moving UCM 2023-02-21 09:55:11 -06:00
Callum McCann
e49e259950 Fixing imports+adding ModelValidation to manifest 2023-02-20 16:36:00 -06:00
Callum McCann
140597276b Renaming to aggregation state 2023-02-20 14:56:26 -06:00
Callum McCann
6712a5841a adding constraint and renaming agg file 2023-02-20 14:52:14 -06:00
Callum McCann
f4356d8dd2 adding entity origin 2023-02-17 15:08:37 -06:00
Callum McCann
7c715c5625 renaming folder 2023-02-17 14:47:01 -06:00
Callum McCann
5b9a24fd23 moving objects into contract graphs 2023-02-17 14:44:57 -06:00
Callum McCann
6378c13e7a fixing some errors on lookup 2023-02-17 13:24:59 -06:00
Callum McCann
2db94c5788 adding warning messages 2023-02-17 08:52:37 -06:00
Callum McCann
f25c8f39fc adding model as property 2023-02-17 08:34:53 -06:00
Callum McCann
3b8b191623 whitespace 2023-02-16 19:21:10 -06:00
Callum McCann
246fd66e8e finalizing validation 2023-02-16 19:05:12 -06:00
Callum McCann
817d39ac14 adding more rules :dance: 2023-02-16 16:21:04 -06:00
Callum McCann
85e27ac747 adding model validator and first 2 rules 2023-02-16 15:15:39 -06:00
Callum McCann
b5ca2e4c5f validation for unique names per entity 2023-02-15 16:20:51 -06:00
Callum McCann
e69b465c41 adding validation for metrics 2023-02-15 13:04:18 -06:00
Callum McCann
6937b321d6 fixing metric reference 2023-02-15 09:16:24 -06:00
Callum McCann
a6fc443abc adding user config model to manifest 2023-02-15 09:03:14 -06:00
Callum McCann
340cae3b43 moving semantics out 2023-02-14 13:03:18 -06:00
Callum McCann
91c5e2cc86 renaming attributes 2023-02-13 16:01:55 -06:00
Callum McCann
bafae0326b folder change and renaming back to expr 2023-02-13 08:49:44 -06:00
Callum McCann
7e1b788bd8 adding primary time aggregation rule 2023-02-11 21:40:40 -06:00
Callum McCann
1bd2fe09a1 renaming to transform model and adding boolean 2023-02-11 21:34:23 -06:00
Callum McCann
5b0197635d second commit - unclean commit sorry future me 2023-02-11 21:24:42 -06:00
Callum McCann
c1ad7b0f0e first commit - adding base constructs/classes 2023-02-10 08:45:15 -06:00
Callum McCann
2da925aa25 Adding entity node to core (#6648)
* first draft

* finishing first commit

* adding testing project

* adding changie

* cleaning

* removing blocks

* fixing proto error message

* updates to events

* fixing issues

* adding test dimension

* updating schemas

* updating manfiest.json

* removing old versions from compatability

* updating

* fixes

* fixing more bugs caught by tests

* updating tests
2023-01-31 09:03:06 -06:00
88 changed files with 6195 additions and 19101 deletions

View File

@@ -0,0 +1,6 @@
kind: Features
body: Adding the entity node
time: 2023-01-18T13:48:04.487817-06:00
custom:
Author: callum-mcdata
Issue: "6627"

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@ __pycache__/
# Distribution / packaging
.Python
env*/
.mf_dbt_venv
dbt_env/
build/
!core/dbt/docs/build

View File

@@ -22,6 +22,18 @@ from dbt.task.build import BuildTask
from dbt.task.generate import GenerateTask
from dbt.task.init import InitTask
import importlib
metricflow_module = importlib.util.find_spec("metricflow")
if metricflow_module is not None:
from metricflow.cli.main import (
list_metrics,
list_dimensions,
get_dimension_values,
query,
validate_configs
)
class dbtUsageException(Exception):
pass
@@ -160,6 +172,14 @@ def clean(ctx, **kwargs):
return results, success
# mf
@cli.group()
@click.pass_context
def mf(ctx, **kwargs):
"""Used to house the metricflow metrics"""
pass
# dbt docs
@cli.group()
@click.pass_context
@@ -572,6 +592,12 @@ def freshness(ctx, **kwargs):
snapshot_freshness = copy(cli.commands["source"].commands["freshness"]) # type: ignore
snapshot_freshness.hidden = True
cli.commands["source"].add_command(snapshot_freshness, "snapshot-freshness") # type: ignore
if metricflow_module is not None:
cli.add_command(list_metrics, "list-metrics")
cli.add_command(list_dimensions, "list-dimensions")
cli.add_command(get_dimension_values, "get-dimension-values")
cli.add_command(query, "query")
cli.add_command(validate_configs, "validate-configs")
# dbt test
@@ -612,6 +638,32 @@ def test(ctx, **kwargs):
return results, success
# dbt validate
# @cli.command("validate")
# @click.pass_context
# @p.args
# @p.profile
# @p.profiles_dir
# @p.project_dir
# @p.target
# @p.vars
# @requires.preflight
# @requires.profile
# @requires.project
# @requires.runtime_config
# @requires.manifest
# def validate(ctx, **kwargs):
# """Validates the semantic layer"""
# task = ValidateTask(
# ctx.obj["flags"],
# ctx.obj["runtime_config"],
# ctx.obj["manifest"],
# )
# results = task.run()
# success = task.interpret_results(results)
# return results, success
# Support running as a module
if __name__ == "__main__":
cli()

View File

@@ -37,7 +37,7 @@ cache_selected_only = click.option(
compile_docs = click.option(
"--compile/--no-compile",
envvar=None,
help="Wether or not to run 'dbt compile' as part of docs generation",
help="Whether or not to run 'dbt compile' as part of docs generation",
default=True,
)

View File

@@ -50,6 +50,7 @@ def print_compile_stats(stats):
NodeType.Source: "source",
NodeType.Exposure: "exposure",
NodeType.Metric: "metric",
NodeType.Entity: "entity",
NodeType.Group: "group",
}
@@ -86,6 +87,8 @@ def _generate_stats(manifest: Manifest):
stats[exposure.resource_type] += 1
for metric in manifest.metrics.values():
stats[metric.resource_type] += 1
for entity in manifest.entities.values():
stats[entity.resource_type] += 1
for macro in manifest.macros.values():
stats[macro.resource_type] += 1
for group in manifest.groups.values():
@@ -402,6 +405,8 @@ class Compiler:
linker.dependency(node.unique_id, (manifest.sources[dependency].unique_id))
elif dependency in manifest.metrics:
linker.dependency(node.unique_id, (manifest.metrics[dependency].unique_id))
elif dependency in manifest.entities:
linker.dependency(node.unique_id, (manifest.entities[dependency].unique_id))
else:
raise GraphDependencyNotFoundError(node, dependency)
@@ -414,6 +419,8 @@ class Compiler:
self.link_node(linker, exposure, manifest)
for metric in manifest.metrics.values():
self.link_node(linker, metric, manifest)
for entity in manifest.entities.values():
self.link_node(linker, entity, manifest)
cycle = linker.find_cycles()

View File

@@ -394,6 +394,7 @@ class PartialProject(RenderComponents):
sources: Dict[str, Any]
tests: Dict[str, Any]
metrics: Dict[str, Any]
entities: Dict[str, Any]
exposures: Dict[str, Any]
vars_value: VarProvider
@@ -404,6 +405,7 @@ class PartialProject(RenderComponents):
sources = cfg.sources
tests = cfg.tests
metrics = cfg.metrics
entities = cfg.entities
exposures = cfg.exposures
if cfg.vars is None:
vars_dict: Dict[str, Any] = {}
@@ -459,6 +461,7 @@ class PartialProject(RenderComponents):
sources=sources,
tests=tests,
metrics=metrics,
entities=entities,
exposures=exposures,
vars=vars_value,
config_version=cfg.config_version,
@@ -563,6 +566,7 @@ class Project:
sources: Dict[str, Any]
tests: Dict[str, Any]
metrics: Dict[str, Any]
entities: Dict[str, Any]
exposures: Dict[str, Any]
vars: VarProvider
dbt_version: List[VersionSpecifier]
@@ -637,6 +641,7 @@ class Project:
"sources": self.sources,
"tests": self.tests,
"metrics": self.metrics,
"entities": self.entities,
"exposures": self.exposures,
"vars": self.vars.to_dict(),
"require-dbt-version": [v.to_version_string() for v in self.dbt_version],

View File

@@ -165,6 +165,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
sources=project.sources,
tests=project.tests,
metrics=project.metrics,
entities=project.entities,
exposures=project.exposures,
vars=project.vars,
config_version=project.config_version,
@@ -314,6 +315,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
"sources": self._get_config_paths(self.sources),
"tests": self._get_config_paths(self.tests),
"metrics": self._get_config_paths(self.metrics),
"entities": self._get_config_paths(self.entities),
"exposures": self._get_config_paths(self.exposures),
}

View File

@@ -30,6 +30,9 @@ import datetime
import re
import itertools
import importlib
metricflow_module = importlib.util.find_spec("metricflow")
# See the `contexts` module README for more information on how contexts work
@@ -51,6 +54,11 @@ def get_re_module_context() -> Dict[str, Any]:
return {name: getattr(re, name) for name in context_exports}
if metricflow_module is not None:
def get_metricflow_module_context() -> Dict[str, Any]:
from metricflow.api.metricflow_client import MetricFlowClient
context_exports = ["explain"]
return {name: getattr(MetricFlowClient, name) for name in context_exports}
def get_itertools_module_context() -> Dict[str, Any]:
# Excluded dropwhile, filterfalse, takewhile and groupby;

View File

@@ -45,6 +45,8 @@ class UnrenderedConfig(ConfigSource):
model_configs = unrendered.get("tests")
elif resource_type == NodeType.Metric:
model_configs = unrendered.get("metrics")
elif resource_type == NodeType.Entity:
model_configs = unrendered.get("entities")
elif resource_type == NodeType.Exposure:
model_configs = unrendered.get("exposures")
else:
@@ -70,6 +72,8 @@ class RenderedConfig(ConfigSource):
model_configs = self.project.tests
elif resource_type == NodeType.Metric:
model_configs = self.project.metrics
elif resource_type == NodeType.Entity:
model_configs = self.project.entities
elif resource_type == NodeType.Exposure:
model_configs = self.project.exposures
else:

View File

@@ -33,12 +33,14 @@ from dbt.contracts.graph.nodes import (
Macro,
Exposure,
Metric,
Entity,
SeedNode,
SourceDefinition,
Resource,
ManifestNode,
)
from dbt.contracts.graph.metrics import MetricReference, ResolvedMetricReference
from dbt.contracts.graph.metrics import MetricReference
from dbt.contracts.graph.entities import EntityReference, ResolvedEntityReference
from dbt.events.functions import get_metadata_vars
from dbt.exceptions import (
CompilationError,
@@ -54,6 +56,7 @@ from dbt.exceptions import (
MacroDispatchArgError,
MacrosSourcesUnWriteableError,
MetricArgsError,
EntityArgsError,
MissingConfigError,
OperationsCannotRefEphemeralNodesError,
PackageNotInDepsError,
@@ -206,7 +209,7 @@ class BaseResolver(metaclass=abc.ABCMeta):
return self.db_wrapper.Relation
@abc.abstractmethod
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference]:
def __call__(self, *args: str) -> Union[str, RelationProxy, MetricReference, EntityReference]:
pass
@@ -305,6 +308,41 @@ class BaseMetricResolver(BaseResolver):
return self.resolve(name, package)
class BaseEntityResolver(BaseResolver):
def resolve(self, name: str, package: Optional[str] = None) -> EntityReference:
...
def _repack_args(self, name: str, package: Optional[str]) -> List[str]:
if package is None:
return [name]
else:
return [package, name]
def validate_args(self, name: str, package: Optional[str]):
if not isinstance(name, str):
raise CompilationError(
f"The name argument to entity() must be a string, got {type(name)}"
)
if package is not None and not isinstance(package, str):
raise CompilationError(
f"The package argument to entity() must be a string or None, got {type(package)}"
)
def __call__(self, *args: str) -> EntityReference:
name: str
package: Optional[str] = None
if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
raise EntityArgsError(node=self.model, args=args)
self.validate_args(name, package)
return self.resolve(name, package)
class Config(Protocol):
def __init__(self, model, context_config: Optional[ContextConfig]):
...
@@ -546,23 +584,38 @@ class ParseMetricResolver(BaseMetricResolver):
class RuntimeMetricResolver(BaseMetricResolver):
def resolve(self, target_name: str, target_package: Optional[str] = None) -> MetricReference:
target_metric = self.manifest.resolve_metric(
def resolve(self, name: str, package: Optional[str] = None) -> MetricReference:
self.model.metrics.append(self._repack_args(name, package))
return MetricReference(name, package)
# metric` implementations
class ParseEntityResolver(BaseEntityResolver):
def resolve(self, name: str, package: Optional[str] = None) -> EntityReference:
self.model.entities.append(self._repack_args(name, package))
return EntityReference(name, package)
class RuntimeEntityResolver(BaseEntityResolver):
def resolve(self, target_name: str, target_package: Optional[str] = None) -> EntityReference:
target_entity = self.manifest.resolve_entity(
target_name,
target_package,
self.current_project,
self.model.package_name,
)
if target_metric is None or isinstance(target_metric, Disabled):
if target_entity is None or isinstance(target_entity, Disabled):
raise TargetNotFoundError(
node=self.model,
target_name=target_name,
target_kind="metric",
target_kind="entity",
target_package=target_package,
)
return ResolvedMetricReference(target_metric, self.manifest, self.Relation)
return ResolvedEntityReference(target_entity, self.manifest, self.Relation)
# `var` implementations.
@@ -623,6 +676,7 @@ class Provider(Protocol):
ref: Type[BaseRefResolver]
source: Type[BaseSourceResolver]
metric: Type[BaseMetricResolver]
entity: Type[BaseEntityResolver]
class ParseProvider(Provider):
@@ -633,6 +687,7 @@ class ParseProvider(Provider):
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver
entity = ParseEntityResolver
class GenerateNameProvider(Provider):
@@ -643,6 +698,7 @@ class GenerateNameProvider(Provider):
ref = ParseRefResolver
source = ParseSourceResolver
metric = ParseMetricResolver
entity = ParseEntityResolver
class RuntimeProvider(Provider):
@@ -653,6 +709,7 @@ class RuntimeProvider(Provider):
ref = RuntimeRefResolver
source = RuntimeSourceResolver
metric = RuntimeMetricResolver
entity = RuntimeEntityResolver
class OperationProvider(RuntimeProvider):
@@ -847,6 +904,10 @@ class ProviderContext(ManifestContext):
def metric(self) -> Callable:
return self.provider.metric(self.db_wrapper, self.model, self.config, self.manifest)
@contextproperty
def entity(self) -> Callable:
return self.provider.entity(self.db_wrapper, self.model, self.config, self.manifest)
@contextproperty("config")
def ctx_config(self) -> Config:
"""The `config` variable exists to handle end-user configuration for
@@ -1431,6 +1492,14 @@ class ExposureMetricResolver(BaseResolver):
return ""
class ExposureEntityResolver(BaseResolver):
def __call__(self, *args) -> str:
if len(args) not in (1, 2):
raise EntityArgsError(node=self.model, args=args)
self.model.entities.append(list(args))
return ""
def generate_parse_exposure(
exposure: Exposure,
config: RuntimeConfig,
@@ -1457,6 +1526,12 @@ def generate_parse_exposure(
project,
manifest,
),
"entity": ExposureEntityResolver(
None,
exposure,
project,
manifest,
),
}
@@ -1501,6 +1576,57 @@ def generate_parse_metrics(
project,
manifest,
),
"entity": ParseEntityResolver(
None,
metric,
project,
manifest,
),
}
class EntityRefResolver(BaseResolver):
def __call__(self, *args) -> str:
package = None
if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
raise RefArgsError(node=self.model, args=args)
self.validate_args(name, package)
self.model.refs.append(list(args))
return ""
def validate_args(self, name, package):
if not isinstance(name, str):
raise ParsingError(
f"In the entity associated with {self.model.original_file_path} "
"the name argument to ref() must be a string"
)
def generate_parse_entities(
entity: Entity,
config: RuntimeConfig,
manifest: Manifest,
package_name: str,
) -> Dict[str, Any]:
project = config.load_dependencies()[package_name]
return {
"ref": EntityRefResolver(
None,
entity,
project,
manifest,
),
# An entity cannot reference another entity so we comment out this section
# "entity": ParseEntityResolver(
# None,
# entity,
# project,
# manifest,
# ),
}

View File

@@ -227,6 +227,7 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
entities: List[str] = field(default_factory=list)
groups: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)

View File

@@ -0,0 +1,91 @@
from __future__ import annotations
from dbt.contracts.util import Mergeable
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List
from dbt.semantic.time import TimeGranularity
from dbt.semantic.references import DimensionReference, TimeDimensionReference
class DimensionType(StrEnum):
CATEGORICAL = "categorical"
TIME = "time"
def is_time_type(self) -> bool:
"""Checks if this type of dimension is a time type"""
return self in [DimensionType.TIME]
@dataclass
class DimensionValidityParams(dbtClassMixin, Mergeable):
"""Parameters identifying a given dimension as an identifier for validity state
This construct is used for supporting SCD Type II tables, such as might be
created via dbt's snapshot feature, or generated via periodic loads from external
dimension data sources. In either of those cases, there is typically a time dimension
associated with the SCD data source that indicates the start and end times of a
validity window, where the dimension value is valid for any time within that range.
"""
is_start: bool = False
is_end: bool = False
@dataclass
class DimensionTypeParameters(dbtClassMixin, Mergeable):
"""This class contains the type parameters required for the semantic layer.
The first iteration of this is specifically focused on time.
Additionally we use the final two properties (start/end) for supporting SCD
Type II tables, such as might be created via dbt's snapshot feature, or generated
via periodic loads from external dimension data sources. In either of those cases,
there is typically a time dimension associated with the SCD data source that
indicates the start and end times of a validity window, where the dimension
value is valid for any time within that range.
TODO: Can we abstract from params and have these be first class??"""
time_granularity: TimeGranularity
is_primary: bool = False
validity_params: Optional[DimensionValidityParams] = None
@dataclass
class Dimension(dbtClassMixin, Mergeable):
"""Each instance of this class represents a dimension in the associated entity."""
name: str
type: DimensionType
type_params: Optional[DimensionTypeParameters] = None
expr: Optional[str] = None
is_partition: bool = False
description: str = ""
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
@property
def is_primary_time(self) -> bool: # noqa: D
if self.type == DimensionType.TIME and self.type_params is not None:
return self.type_params.is_primary
return False
@property
def reference(self) -> DimensionReference: # noqa: D
return DimensionReference(name=self.name)
@property
def time_dimension_reference(self) -> TimeDimensionReference: # noqa: D
assert (
self.type == DimensionType.TIME
), f"Got type as {self.type} instead of {DimensionType.TIME}"
return TimeDimensionReference(name=self.name)
@property
def validity_params(self) -> Optional[DimensionValidityParams]:
"""Returns the DimensionValidityParams property, if it exists.
This is to avoid repeatedly checking that type params is not None before doing anything with ValidityParams
"""
if self.type_params:
return self.type_params.validity_params
return None

View File

@@ -0,0 +1,72 @@
from dbt.contracts.util import Mergeable
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dataclasses import dataclass
from typing import Optional
class EntityReference(object):
def __init__(self, entity_name, package_name=None):
self.entity_name = entity_name
self.package_name = package_name
def __str__(self):
return f"{self.entity_name}"
class ResolvedEntityReference(EntityReference):
"""
Simple proxy over an Entity which delegates property
lookups to the underlying node. Also adds helper functions
for working with metrics (ie. __str__ and templating functions)
"""
def __init__(self, node, manifest, Relation):
super().__init__(node.name, node.package_name)
self.node = node
self.manifest = manifest
self.Relation = Relation
def __getattr__(self, key):
return getattr(self.node, key)
def __str__(self):
return f"{self.node.name}"
class EntityMutabilityType(StrEnum):
"""How data at the physical layer is expected to behave"""
UNKNOWN = "UNKNOWN"
IMMUTABLE = "IMMUTABLE" # never changes
APPEND_ONLY = "APPEND_ONLY" # appends along an orderable column
DS_APPEND_ONLY = "DS_APPEND_ONLY" # appends along daily column
FULL_MUTATION = "FULL_MUTATION" # no guarantees, everything may change
@dataclass
class EntityMutabilityTypeParams(dbtClassMixin, Mergeable):
"""Type params add additional context to mutability"""
min: Optional[str] = None
max: Optional[str] = None
update_cron: Optional[str] = None
along: Optional[str] = None
@dataclass
class EntityMutability(dbtClassMixin):
"""Describes the mutability properties of a data source"""
type: EntityMutabilityType
type_params: Optional[EntityMutabilityTypeParams] = None
class EntityOrigin(StrEnum):
"""Describes how data sources were created
Impacts determination of validity and duration of storage
"""
SOURCE = "source" # "input" data sources
DERIVED = (
"derived" # generated by the semantic layer originating (perhaps indirectly) from sources
)

View File

@@ -0,0 +1,74 @@
from __future__ import annotations
from dbt.contracts.util import (
Mergeable,
)
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
from dbt.semantic.references import CompositeSubIdentifierReference, IdentifierReference
class IdentifierType(StrEnum):
"""Defines uniqueness and the extent to which an identifier represents the common entity for a data source"""
FOREIGN = "foreign"
NATURAL = "natural"
PRIMARY = "primary"
UNIQUE = "unique"
@dataclass
class CompositeSubIdentifier(dbtClassMixin):
"""CompositeSubIdentifiers either describe or reference the identifiers that comprise a composite identifier"""
name: Optional[str] = None
expr: Optional[str] = None
ref: Optional[str] = None
@property
def reference(self) -> CompositeSubIdentifierReference: # noqa: D
assert (
self.name
), f"The element name should have been set during model transformation. Got {self}"
return CompositeSubIdentifierReference(name=self.name)
@dataclass
class Identifier(dbtClassMixin, Mergeable):
"""Describes a identifier"""
name: str
type: IdentifierType
description: str = ""
role: Optional[str] = None
entity: Optional[str] = None
identifiers: List[CompositeSubIdentifier] = field(default_factory=list)
expr: Optional[str] = None
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
# Moved validation down to entity level. No more default_entity_value
@property
def is_primary_time(self) -> bool: # noqa: D
return False
@property
def is_composite(self) -> bool: # noqa: D
return self.identifiers is not None and len(self.identifiers) > 0
@property
def reference(self) -> IdentifierReference: # noqa: D
return IdentifierReference(name=self.name)
@property
def is_linkable_identifier_type(self) -> bool:
"""Indicates whether or not this identifier can be used as a linkable identifier type for joins
That is, can you use the identifier as a linkable element in multi-hop dundered syntax. For example,
the country dimension in the listings data source can be linked via listing__country, because listing
is the primary key.
At the moment, you may only request things accessible via primary, unique, or natural keys, with natural
keys reserved for SCD Type II style data sources.
"""
return self.type in (IdentifierType.PRIMARY, IdentifierType.UNIQUE, IdentifierType.NATURAL)

View File

@@ -29,6 +29,7 @@ from dbt.contracts.graph.nodes import (
GenericTestNode,
Exposure,
Metric,
Entity,
Group,
UnpatchedSourceDefinition,
ManifestNode,
@@ -36,6 +37,7 @@ from dbt.contracts.graph.nodes import (
ResultNode,
BaseNode,
)
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.contracts.graph.unparsed import SourcePatch
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
from dbt.contracts.util import BaseArtifactMetadata, SourceKey, ArtifactMixin, schema_version
@@ -213,6 +215,39 @@ class MetricLookup(dbtClassMixin):
return manifest.metrics[unique_id]
class EntityLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)
def get_unique_id(self, search_name, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)
def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None
def add_entity(self, entity: Entity):
if entity.search_name not in self.storage:
self.storage[entity.search_name] = {}
self.storage[entity.search_name][entity.package_name] = entity.unique_id
def populate(self, manifest):
for entity in manifest.entities.values():
if hasattr(entity, "name"):
self.add_entity(entity)
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> Entity:
if unique_id not in manifest.entities:
raise dbt.exceptions.DbtInternalError(
f"Entity {unique_id} found in cache but not found in manifest"
)
return manifest.entities[unique_id]
# This handles both models/seeds/snapshots and sources/metrics/exposures
class DisabledLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
@@ -457,6 +492,9 @@ class Disabled(Generic[D]):
MaybeMetricNode = Optional[Union[Metric, Disabled[Metric]]]
MaybeEntityNode = Optional[Union[Entity, Disabled[Entity]]]
MaybeDocumentation = Optional[Documentation]
@@ -600,6 +638,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs: MutableMapping[str, Documentation] = field(default_factory=dict)
exposures: MutableMapping[str, Exposure] = field(default_factory=dict)
metrics: MutableMapping[str, Metric] = field(default_factory=dict)
entities: MutableMapping[str, Entity] = field(default_factory=dict)
groups: MutableMapping[str, Group] = field(default_factory=dict)
selectors: MutableMapping[str, Any] = field(default_factory=dict)
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
@@ -622,6 +661,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_entity_lookup: Optional[EntityLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_disabled_lookup: Optional[DisabledLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
@@ -654,6 +696,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def update_metric(self, new_metric: Metric):
_update_into(self.metrics, new_metric)
def update_entity(self, new_entity: Entity):
_update_into(self.entities, new_entity)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
@@ -670,6 +715,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
"exposures": {k: v.to_dict(omit_none=False) for k, v in self.exposures.items()},
"groups": {k: v.to_dict(omit_none=False) for k, v in self.groups.items()},
"metrics": {k: v.to_dict(omit_none=False) for k, v in self.metrics.items()},
"entities": {k: v.to_dict(omit_none=False) for k, v in self.entities.items()},
"nodes": {k: v.to_dict(omit_none=False) for k, v in self.nodes.items()},
"sources": {k: v.to_dict(omit_none=False) for k, v in self.sources.items()},
}
@@ -732,6 +778,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.nodes.values(),
self.sources.values(),
self.metrics.values(),
self.entities.values(),
)
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
@@ -760,6 +807,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
entities={k: _deepcopy(v) for k, v in self.entities.items()},
groups={k: _deepcopy(v) for k, v in self.groups.items()},
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
@@ -777,6 +825,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.sources.values(),
self.exposures.values(),
self.metrics.values(),
self.entities.values(),
)
)
forward_edges, backward_edges = build_node_edges(edge_members)
@@ -816,6 +865,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs=self.docs,
exposures=self.exposures,
metrics=self.metrics,
entities=self.entities,
groups=self.groups,
selectors=self.selectors,
metadata=self.metadata,
@@ -839,6 +889,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return self.exposures[unique_id]
elif unique_id in self.metrics:
return self.metrics[unique_id]
elif unique_id in self.entities:
return self.entities[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.DbtInternalError(
@@ -875,6 +927,12 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self._metric_lookup = MetricLookup(self)
return self._metric_lookup
@property
def entity_lookup(self) -> EntityLookup:
if self._entity_lookup is None:
self._entity_lookup = EntityLookup(self)
return self._entity_lookup
def rebuild_ref_lookup(self):
self._ref_lookup = RefableLookup(self)
@@ -975,6 +1033,31 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return Disabled(disabled[0])
return None
def resolve_entity(
self,
target_entity_name: str,
target_entity_package: Optional[str],
current_project: str,
node_package: str,
) -> MaybeEntityNode:
entity: Optional[Entity] = None
disabled: Optional[List[Entity]] = None
candidates = _search_packages(current_project, node_package, target_entity_package)
for pkg in candidates:
entity = self.entity_lookup.find(target_entity_name, pkg, self)
if entity is not None and entity.config.enabled:
return entity
# it's possible that the node is disabled
if disabled is None:
disabled = self.disabled_lookup.find(f"{target_entity_name}", pkg)
if disabled:
return Disabled(disabled[0])
return None
# Called by DocsRuntimeContext.doc
def resolve_doc(
self,
@@ -1087,6 +1170,11 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.metrics[metric.unique_id] = metric
source_file.metrics.append(metric.unique_id)
def add_entity(self, source_file: SchemaSourceFile, entity: Entity):
_check_duplicates(entity, self.entities)
self.entities[entity.unique_id] = entity
source_file.entities.append(entity.unique_id)
def add_group(self, source_file: SchemaSourceFile, group: Group):
_check_duplicates(group, self.groups)
self.groups[group.unique_id] = group
@@ -1107,6 +1195,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
source_file.add_test(node.unique_id, test_from)
if isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
if isinstance(node, Entity):
source_file.entities.append(node.unique_id)
if isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
else:
@@ -1134,6 +1224,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.docs,
self.exposures,
self.metrics,
self.entities,
self.groups,
self.selectors,
self.files,
@@ -1147,11 +1238,20 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self._source_lookup,
self._ref_lookup,
self._metric_lookup,
self._entity_lookup,
self._disabled_lookup,
self._analysis_lookup,
)
return self.__class__, args
@property
def user_configured_model(self):
user_configured_model = UserConfiguredModel(
entities=[entity for entity in self.entities.values()],
metrics=[metric for metric in self.metrics.values()],
)
return user_configured_model
class MacroManifest(MacroMethods):
def __init__(self, macros):
@@ -1188,6 +1288,9 @@ class WritableManifest(ArtifactMixin):
metrics: Mapping[UniqueID, Metric] = field(
metadata=dict(description=("The metrics defined in the dbt project and its dependencies"))
)
entities: Mapping[UniqueID, Entity] = field(
metadata=dict(description=("The entities defined in the dbt project and its dependencies"))
)
groups: Mapping[UniqueID, Group] = field(
metadata=dict(description=("The groups defined in the dbt project"))
)

View File

@@ -0,0 +1,60 @@
from __future__ import annotations
from dbt.contracts.util import Replaceable, Mergeable
from dbt.dataclass_schema import dbtClassMixin
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
from dbt.semantic.aggregation_properties import AggregationType
from dbt.semantic.references import TimeDimensionReference, MeasureReference
@dataclass
class MeasureAggregationParameters(dbtClassMixin, Replaceable):
"""Describes parameters for aggregations"""
percentile: Optional[float] = None
use_discrete_percentile: bool = False
use_approximate_percentile: bool = False
@dataclass
class MeasureNonAdditiveDimensionParameters(dbtClassMixin, Replaceable):
"""Describes the params for specifying non-additive dimensions in a measure.
NOTE: Currently, only TimeDimensions are supported for this filter
"""
name: str
window_choice: AggregationType = AggregationType.MIN
window_groupings: List[str] = field(default_factory=list)
@dataclass
class Measure(dbtClassMixin, Mergeable):
"""Describes a measure"""
name: str
agg: AggregationType
description: str = ""
expr: Optional[str] = None
create_metric: Optional[bool] = None
agg_params: Optional[MeasureAggregationParameters] = None
non_additive_dimension: Optional[MeasureNonAdditiveDimensionParameters] = None
# Defines the time dimension to aggregate this measure by. If not specified, it means to use the primary time
# dimension in the data source.
agg_time_dimension: Optional[str] = None
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
@property
def checked_agg_time_dimension(self) -> TimeDimensionReference:
"""Returns the aggregation time dimension, throwing an exception if it's not set."""
assert self.agg_time_dimension, (
f"Aggregation time dimension for measure {self.name} is not set! This should either be set directly on "
f"the measure specification in the model, or else defaulted to the primary time dimension in the data "
f"source containing the measure."
)
return TimeDimensionReference(name=self.agg_time_dimension)
@property
def reference(self) -> MeasureReference: # noqa: D
return MeasureReference(name=self.name)

View File

@@ -1,4 +1,13 @@
from dbt.node_types import NodeType
from __future__ import annotations
import json
from dbt.contracts.util import Replaceable, Mergeable
from dbt.exceptions import ParsingError
from dbt.dataclass_schema import dbtClassMixin, StrEnum
from dataclasses import dataclass, field
from typing import Optional, List, Union
from dbt.semantic.references import MeasureReference
from dbt.semantic.time import TimeGranularity, string_to_time_granularity
from dbt.semantic.constraints import WhereClauseConstraint
class MetricReference(object):
@@ -10,84 +19,155 @@ class MetricReference(object):
return f"{self.metric_name}"
class ResolvedMetricReference(MetricReference):
"""
Simple proxy over a Metric which delegates property
lookups to the underlying node. Also adds helper functions
for working with metrics (ie. __str__ and templating functions)
class MetricType(StrEnum):
"""Currently supported metric types"""
MEASURE_PROXY = "measure_proxy"
RATIO = "ratio"
EXPR = "expr"
CUMULATIVE = "cumulative"
DERIVED = "derived"
@dataclass
class UnparsedMetricInputMeasure(dbtClassMixin, Replaceable):
"""Provides a pointer to a measure along with metric-specific processing directives
If an alias is set, this will be used as the string name reference for this measure after the aggregation
phase in the SQL plan.
"""
def __init__(self, node, manifest, Relation):
super().__init__(node.name, node.package_name)
self.node = node
self.manifest = manifest
self.Relation = Relation
name: str
constraint: Optional[str] = None
alias: Optional[str] = None
def __getattr__(self, key):
return getattr(self.node, key)
def __str__(self):
return f"{self.node.name}"
@dataclass
class MetricInputMeasure(dbtClassMixin, Replaceable):
"""Provides a pointer to a measure along with metric-specific processing directives
If an alias is set, this will be used as the string name reference for this measure after the aggregation
phase in the SQL plan.
"""
@classmethod
def parent_metrics(cls, metric_node, manifest):
yield metric_node
name: str
constraint: Optional[WhereClauseConstraint] = None
alias: Optional[str] = None
for parent_unique_id in metric_node.depends_on.nodes:
node = manifest.metrics.get(parent_unique_id)
if node and node.resource_type == NodeType.Metric:
yield from cls.parent_metrics(node, manifest)
# Removed _from_yaml_value due to how dbt reads in yml
@classmethod
def parent_metrics_names(cls, metric_node, manifest):
yield metric_node.name
@property
def measure_reference(self) -> MeasureReference:
"""Property accessor to get the MeasureReference associated with this metric input measure"""
return MeasureReference(name=self.name)
for parent_unique_id in metric_node.depends_on.nodes:
node = manifest.metrics.get(parent_unique_id)
if node and node.resource_type == NodeType.Metric:
yield from cls.parent_metrics_names(node, manifest)
@property
def post_aggregation_measure_reference(self) -> MeasureReference:
"""Property accessor to get the MeasureReference with the aliased name, if appropriate"""
return MeasureReference(name=self.alias or self.name)
@classmethod
def reverse_dag_parsing(cls, metric_node, manifest, metric_depth_count):
if metric_node.calculation_method == "derived":
yield {metric_node.name: metric_depth_count}
metric_depth_count = metric_depth_count + 1
def __hash__(self) -> int: # noqa: D
return hash(json.dumps(self.to_dict()))
for parent_unique_id in metric_node.depends_on.nodes:
node = manifest.metrics.get(parent_unique_id)
if (
node
and node.resource_type == NodeType.Metric
and node.calculation_method == "derived"
):
yield from cls.reverse_dag_parsing(node, manifest, metric_depth_count)
def full_metric_dependency(self):
to_return = list(set(self.parent_metrics_names(self.node, self.manifest)))
return to_return
@dataclass
class MetricTimeWindow(dbtClassMixin, Mergeable):
"""Describes the window of time the metric should be accumulated over, e.g., '1 day', '2 weeks', etc"""
def base_metric_dependency(self):
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))
count: int
granularity: TimeGranularity
to_return = []
for metric in in_scope_metrics:
if metric.calculation_method != "derived" and metric.name not in to_return:
to_return.append(metric.name)
def to_string(self) -> str: # noqa: D
return f"{self.count} {self.granularity.value}"
return to_return
@staticmethod
def parse(window: str) -> MetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise
Output of the form: (<time unit count>, <time granularity>, <error message>) - error message is None if window is formatted properly
"""
parts = window.split(" ")
if len(parts) != 2:
raise ParsingError(
f"Invalid window ({window}) in cumulative metric. Should be of the form `<count> <granularity>`, e.g., `28 days`",
)
def derived_metric_dependency(self):
in_scope_metrics = list(self.parent_metrics(self.node, self.manifest))
granularity = parts[1]
# if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s')
if granularity.endswith("s"):
# months -> month
granularity = granularity[:-1]
if granularity not in [item.value for item in TimeGranularity]:
raise ParsingError(
f"Invalid time granularity {granularity} in cumulative metric window string: ({window})",
)
to_return = []
for metric in in_scope_metrics:
if metric.calculation_method == "derived" and metric.name not in to_return:
to_return.append(metric.name)
count = parts[0]
if not count.isdigit():
raise ParsingError(
f"Invalid count ({count}) in cumulative metric window string: ({window})"
)
return to_return
return MetricTimeWindow(
count=int(count),
granularity=string_to_time_granularity(granularity),
)
def derived_metric_dependency_depth(self):
metric_depth_count = 1
to_return = list(self.reverse_dag_parsing(self.node, self.manifest, metric_depth_count))
return to_return
@dataclass
class UnparsedMetricInput(dbtClassMixin, Mergeable):
"""Provides a pointer to a metric along with the additional properties used on that metric."""
name: str
constraint: Optional[str] = None
alias: Optional[str] = None
offset_window: Optional[MetricTimeWindow] = None
offset_to_grain: Optional[TimeGranularity] = None
@dataclass
class MetricInput(dbtClassMixin, Mergeable):
"""Provides a pointer to a metric along with the additional properties used on that metric."""
name: str
constraint: Optional[WhereClauseConstraint] = None
alias: Optional[str] = None
offset_window: Optional[MetricTimeWindow] = None
offset_to_grain: Optional[TimeGranularity] = None
@dataclass
class UnparsedMetricTypeParams(dbtClassMixin, Mergeable):
"""Type params add additional context to certain metric types (the context depends on the metric type)"""
# NOTE: Adding a union to allow for the class or a string. We
# change to prefered class in schemas.py during conversion to Metric
measure: Optional[Union[UnparsedMetricInputMeasure, str]] = None
measures: List[Union[UnparsedMetricInputMeasure, str]] = field(default_factory=list)
numerator: Optional[Union[UnparsedMetricInputMeasure, str]] = None
denominator: Optional[Union[UnparsedMetricInputMeasure, str]] = None
expr: Optional[str] = None
window: Optional[Union[MetricTimeWindow, str]] = None
grain_to_date: Optional[TimeGranularity] = None
metrics: List[Union[UnparsedMetricInput, str]] = field(default_factory=list)
@dataclass
class MetricTypeParams(dbtClassMixin):
"""Type params add additional context to certain metric types (the context depends on the metric type)"""
measure: Optional[MetricInputMeasure] = None
measures: List[MetricInputMeasure] = field(default_factory=list)
numerator: Optional[MetricInputMeasure] = None
denominator: Optional[MetricInputMeasure] = None
expr: Optional[str] = None
window: Optional[MetricTimeWindow] = None
grain_to_date: Optional[TimeGranularity] = None
metrics: List[MetricInput] = field(default_factory=list)
@property
def numerator_measure_reference(self) -> Optional[MeasureReference]:
"""Return the measure reference, if any, associated with the metric input measure defined as the numerator"""
return self.numerator.measure_reference if self.numerator else None
@property
def denominator_measure_reference(self) -> Optional[MeasureReference]:
"""Return the measure reference, if any, associated with the metric input measure defined as the denominator"""
return self.denominator.measure_reference if self.denominator else None

View File

@@ -369,6 +369,11 @@ class MetricConfig(BaseConfig):
group: Optional[str] = None
@dataclass
class EntityConfig(BaseConfig):
enabled: bool = True
@dataclass
class ExposureConfig(BaseConfig):
enabled: bool = True
@@ -610,6 +615,7 @@ class SnapshotConfig(EmptySnapshotConfig):
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
NodeType.Metric: MetricConfig,
NodeType.Entity: EntityConfig,
NodeType.Exposure: ExposureConfig,
NodeType.Source: SourceConfig,
NodeType.Seed: SeedConfig,

View File

@@ -13,6 +13,14 @@ from typing import (
Iterator,
)
from dbt.semantic.references import (
MeasureReference,
LinkableElementReference,
EntityReference,
)
from dbt.semantic.object_utils import hash_items
from dbt.semantic.constraints import WhereClauseConstraint
from dbt.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
from dbt.clients.system import write_file
@@ -31,8 +39,16 @@ from dbt.contracts.graph.unparsed import (
Owner,
ExposureType,
MaturityType,
MetricFilter,
MetricTime,
)
from dbt.contracts.graph.identifiers import Identifier
from dbt.contracts.graph.dimensions import Dimension
from dbt.contracts.graph.measures import Measure
from dbt.contracts.graph.entities import EntityOrigin
from dbt.contracts.graph.metrics import (
MetricType,
MetricInputMeasure,
MetricTypeParams,
MetricInput,
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.events.proto_types import NodeInfo
@@ -57,6 +73,7 @@ from .model_config import (
TestConfig,
SourceConfig,
MetricConfig,
EntityConfig,
ExposureConfig,
EmptySnapshotConfig,
SnapshotConfig,
@@ -278,7 +295,7 @@ class ParsedNode(NodeInfoMixin, ParsedNodeMandatory, SerializableType):
@classmethod
def _deserialize(cls, dct: Dict[str, int]):
# The serialized ParsedNodes do not differ from each other
# in fields that would allow 'from_dict' to distinguis
# in fields that would allow 'from_dict' to distinguish
# between them.
resource_type = dct["resource_type"]
if resource_type == "model":
@@ -418,6 +435,7 @@ class CompiledNode(ParsedNode):
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
entities: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
compiled_path: Optional[str] = None
compiled: bool = False
@@ -971,6 +989,7 @@ class Exposure(GraphNode):
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
entities: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
@@ -1033,27 +1052,15 @@ class Exposure(GraphNode):
# ====================================
@dataclass
class MetricReference(dbtClassMixin, Replaceable):
sql: Optional[Union[str, int]]
unique_id: Optional[str]
@dataclass
class Metric(GraphNode):
name: str
description: str
label: str
calculation_method: str
expression: str
filters: List[MetricFilter]
time_grains: List[str]
dimensions: List[str]
type: MetricType
type_params: MetricTypeParams
entity: Optional[str] = None
constraint: Optional[WhereClauseConstraint] = None
resource_type: NodeType = field(metadata={"restrict": [NodeType.Metric]})
timestamp: Optional[str] = None
window: Optional[MetricTime] = None
model: Optional[str] = None
model_unique_id: Optional[str] = None
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: MetricConfig = field(default_factory=MetricConfig)
@@ -1062,8 +1069,43 @@ class Metric(GraphNode):
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
entities: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
def input_measures(self) -> List[MetricInputMeasure]:
"""Return the complete list of input measure configurations for this metric"""
tp = self.type_params
res = tp.measures or []
if tp.measure:
res.append(tp.measure)
if tp.numerator:
res.append(tp.numerator)
if tp.denominator:
res.append(tp.denominator)
return res
@property
def measure_references(self) -> List[MeasureReference]:
"""Return the measure references associated with all input measure configurations for this metric"""
return [x.measure_reference for x in self.input_measures]
@property
def input_metrics(self) -> List[MetricInput]:
"""Return the associated input metrics for this metric"""
return self.type_params.metrics or []
@property
def definition_hash(self) -> str: # noqa: D
values: List[str] = [self.name, self.type_params.expr or ""]
if self.constraint:
values.append(self.constraint.where_clause)
if self.constraint.linkable_names:
values.extend(self.constraint.linkable_names)
values.extend([m.name for m in self.measure_references])
return hash_items(values)
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@@ -1072,35 +1114,20 @@ class Metric(GraphNode):
def search_name(self):
return self.name
def same_model(self, old: "Metric") -> bool:
return self.model == old.model
def same_window(self, old: "Metric") -> bool:
return self.window == old.window
def same_dimensions(self, old: "Metric") -> bool:
return self.dimensions == old.dimensions
def same_filters(self, old: "Metric") -> bool:
return self.filters == old.filters
def same_entity(self, old: "Metric") -> bool:
return self.entity == old.entity
def same_description(self, old: "Metric") -> bool:
return self.description == old.description
def same_label(self, old: "Metric") -> bool:
return self.label == old.label
def same_type(self, old: "Metric") -> bool:
return self.type == old.type
def same_calculation_method(self, old: "Metric") -> bool:
return self.calculation_method == old.calculation_method
def same_type_params(self, old: "Metric") -> bool:
return self.type_params == old.type_params
def same_expression(self, old: "Metric") -> bool:
return self.expression == old.expression
def same_timestamp(self, old: "Metric") -> bool:
return self.timestamp == old.timestamp
def same_time_grains(self, old: "Metric") -> bool:
return self.time_grains == old.time_grains
def same_constraint(self, old: "Metric") -> bool:
return self.constraint == old.constraint
def same_config(self, old: "Metric") -> bool:
return self.config.same_contents(
@@ -1115,21 +1142,164 @@ class Metric(GraphNode):
return True
return (
self.same_model(old)
and self.same_window(old)
and self.same_dimensions(old)
and self.same_filters(old)
and self.same_description(old)
and self.same_label(old)
and self.same_calculation_method(old)
and self.same_expression(old)
and self.same_timestamp(old)
and self.same_time_grains(old)
self.same_description(old)
and self.same_entity(old)
and self.same_constraint(old)
and self.same_type(old)
and self.same_type_params(old)
and self.same_config(old)
and True
)
@dataclass
class Entity(GraphNode):
name: str
model: str
description: str
origin: EntityOrigin
sql_table: Optional[str] = None
identifiers: Sequence[Identifier] = field(default_factory=list)
dimensions: Sequence[Dimension] = field(default_factory=list)
measures: Sequence[Measure] = field(default_factory=list)
resource_type: NodeType = field(metadata={"restrict": [NodeType.Entity]})
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: EntityConfig = field(default_factory=EntityConfig)
unrendered_config: Dict[str, Any] = field(default_factory=dict)
sources: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
entities: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@property
def search_name(self):
return self.name
def same_model(self, old: "Entity") -> bool:
return self.model == old.model
def same_identifiers(self, old: "Entity") -> bool:
return self.identifiers == old.identifiers
def same_dimensions(self, old: "Entity") -> bool:
return self.dimensions == old.dimensions
def same_measures(self, old: "Entity") -> bool:
return self.measures == old.measures
def same_description(self, old: "Entity") -> bool:
return self.description == old.description
def same_origin(self, old: "Entity") -> bool:
return self.origin == old.origin
def same_config(self, old: "Entity") -> bool:
return self.config.same_contents(
self.unrendered_config,
old.unrendered_config,
)
def same_contents(self, old: Optional["Entity"]) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True
return (
self.same_model(old)
and self.same_identifiers(old)
and self.same_dimensions(old)
and self.same_measures(old)
and self.same_description(old)
and self.same_origin(old)
and self.same_config(old)
and True
)
@property
def identifier_references(self) -> List[LinkableElementReference]: # noqa: D
return [i.reference for i in self.identifiers]
@property
def dimension_references(self) -> List[LinkableElementReference]: # noqa: D
return [i.reference for i in self.dimensions]
@property
def measure_references(self) -> List[MeasureReference]: # noqa: D
return [i.reference for i in self.measures]
def get_measure(self, measure_reference: MeasureReference) -> Measure: # noqa: D
for measure in self.measures:
if measure.reference == measure_reference:
return measure
raise ValueError(
f"No dimension with name ({measure_reference.name}) in data source with name ({self.name})"
)
def get_dimension(self, dimension_reference: LinkableElementReference) -> Dimension: # noqa: D
for dim in self.dimensions:
if dim.reference == dimension_reference:
return dim
raise ValueError(
f"No dimension with name ({dimension_reference}) in data source with name ({self.name})"
)
def get_identifier(
self, identifier_reference: LinkableElementReference
) -> Identifier: # noqa: D
for ident in self.identifiers:
if ident.reference == identifier_reference:
return ident
raise ValueError(
f"No identifier with name ({identifier_reference}) in data source with name ({self.name})"
)
@property
def has_validity_dimensions(self) -> bool:
"""Returns True if there are validity params set on one or more dimensions"""
return any([dim.validity_params is not None for dim in self.dimensions])
@property
def validity_start_dimension(self) -> Optional[Dimension]:
"""Returns the validity window start dimension, if one is set"""
validity_start_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_start
]
if not validity_start_dims:
return None
assert (
len(validity_start_dims) == 1
), "Found more than one validity start dimension. This should have been blocked in validation!"
return validity_start_dims[0]
@property
def validity_end_dimension(self) -> Optional[Dimension]:
"""Returns the validity window end dimension, if one is set"""
validity_end_dims = [
dim for dim in self.dimensions if dim.validity_params and dim.validity_params.is_end
]
if not validity_end_dims:
return None
assert (
len(validity_end_dims) == 1
), "Found more than one validity end dimension. This should have been blocked in validation!"
return validity_end_dims[0]
@property
def reference(self) -> EntityReference: # noqa: D
return EntityReference(entity_name=self.name)
# ====================================
# Group node
# ====================================
@@ -1204,6 +1374,7 @@ GraphMemberNode = Union[
ResultNode,
Exposure,
Metric,
Entity,
]
# All "nodes" (or node-like objects) in this file

View File

@@ -1,3 +1,4 @@
from __future__ import annotations
import re
from dbt import deprecations
@@ -6,7 +7,6 @@ from dbt.contracts.util import (
AdditionalPropertiesMixin,
Mergeable,
Replaceable,
rename_metric_attr,
)
# trigger the PathEncoder
@@ -15,6 +15,17 @@ from dbt.exceptions import CompilationError, ParsingError
from dbt.dataclass_schema import dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, ValidationError
# Semantic Classes
from dbt.contracts.graph.dimensions import Dimension
from dbt.contracts.graph.identifiers import Identifier
from dbt.contracts.graph.measures import Measure
from dbt.contracts.graph.metrics import (
MetricType,
UnparsedMetricTypeParams,
)
from dbt.semantic.constraints import WhereClauseConstraint
from dbt.contracts.graph.entities import EntityMutability, EntityMutabilityType, EntityOrigin
from dataclasses import dataclass, field
from datetime import timedelta
from pathlib import Path
@@ -458,87 +469,107 @@ class UnparsedExposure(dbtClassMixin, Replaceable):
raise ValidationError("Exposure owner must have at least one of 'name' or 'email'.")
@dataclass
class MetricFilter(dbtClassMixin, Replaceable):
field: str
operator: str
# TODO : Can we make this Any?
value: str
class MetricTimePeriod(StrEnum):
day = "day"
week = "week"
month = "month"
year = "year"
def plural(self) -> str:
return str(self) + "s"
#########################
# SEMANTIC LAYER CLASSES
#########################
@dataclass
class MetricTime(dbtClassMixin, Mergeable):
count: Optional[int] = None
period: Optional[MetricTimePeriod] = None
class UnparsedEntity(dbtClassMixin, Replaceable):
"""This class is used for entity information"""
def __bool__(self):
return self.count is not None and self.period is not None
@dataclass
class UnparsedMetric(dbtClassMixin, Replaceable):
name: str
label: str
calculation_method: str
expression: str
model: str
description: str = ""
timestamp: Optional[str] = None
time_grains: List[str] = field(default_factory=list)
dimensions: List[str] = field(default_factory=list)
window: Optional[MetricTime] = None
model: Optional[str] = None
filters: List[MetricFilter] = field(default_factory=list)
identifiers: Sequence[Identifier] = field(default_factory=list)
dimensions: Sequence[Dimension] = field(default_factory=list)
measures: Sequence[Measure] = field(default_factory=list)
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
# TODO: Figure out if we need this
mutability: EntityMutability = EntityMutability(type=EntityMutabilityType.FULL_MUTATION)
# TODO: Figure out if we need this
origin: EntityOrigin = EntityOrigin.SOURCE
@classmethod
def validate(cls, data):
super(UnparsedEntity, cls).validate(data)
# TODO: Replace this hacky way to verify a ref statement
# We are using this today in order to verify that model field
# is taking a ref input
if "ref('" not in data["model"]:
raise ParsingError(
f"The entity '{data['name']}' does not contain a proper ref('') in the model property."
)
for identifier in data["identifiers"]:
if identifier.get("entity") is None:
if "name" not in identifier:
raise ParsingError(
f"Failed to define identifier entity value for entity '{data['name']}' because identifier name was not defined."
)
identifier["entity"] = identifier["name"]
@dataclass
class UnparsedMetric(dbtClassMixin):
"""Describes a metric"""
name: str
type: MetricType
type_params: UnparsedMetricTypeParams
description: str = ""
entity: Optional[str] = None
constraint: Optional[str] = None
meta: Dict[str, Any] = field(default_factory=dict)
tags: List[str] = field(default_factory=list)
config: Dict[str, Any] = field(default_factory=dict)
@classmethod
def validate(cls, data):
data = rename_metric_attr(data, raise_deprecation_warning=True)
super(UnparsedMetric, cls).validate(data)
if "name" in data:
errors = []
if " " in data["name"]:
errors.append("cannot contain spaces")
# This handles failing queries due to too long metric names.
# It only occurs in BigQuery and Snowflake (Postgres/Redshift truncate)
if len(data["name"]) > 250:
errors.append("cannot contain more than 250 characters")
if not (re.match(r"^[A-Za-z]", data["name"])):
errors.append("must begin with a letter")
if not (re.match(r"[\w-]+$", data["name"])):
errors.append("must contain only letters, numbers and underscores")
if errors:
raise ParsingError(
f"The metric name '{data['name']}' is invalid. It {', '.join(e for e in errors)}"
# The following validation is because CM couldn't figure out a better way
# to parse constraint strings into WhereClauseConstraints without throwing
# errors all over the place
if "constraint" in data:
if isinstance(data["constraint"], str):
data["constraint"] = WhereClauseConstraint.parse(data["constraint"])
else:
raise CompilationError(
f"Expected input for constraint on metric {data['name']} to be of type string"
)
if data.get("timestamp") is None and data.get("time_grains") is not None:
raise ValidationError(
f"The metric '{data['name']} has time_grains defined but is missing a timestamp dimension."
)
if "type_params" in data:
if "metrics" in data["type_params"]:
for loop_id, metric in enumerate(data["type_params"]["metrics"]):
if isinstance(metric, dict):
if isinstance(metric["constraint"], str):
data["type_params"]["metrics"][loop_id][
"constraint"
] = WhereClauseConstraint.parse(metric["constraint"])
if data.get("timestamp") is None and data.get("window") is not None:
raise ValidationError(
f"The metric '{data['name']} has a window defined but is missing a timestamp dimension."
)
if data.get("model") is None and data.get("calculation_method") != "derived":
raise ValidationError("Non-derived metrics require a 'model' property")
if data.get("model") is not None and data.get("calculation_method") == "derived":
raise ValidationError("Derived metrics cannot have a 'model' property")
# TODO: Figure out better way to convert to input measures. We need this here
# so we can do full "mf model" validation in schemas.py. Specifically for input
# measure metric rules - they requrie that identifiers be present in metrics propert
if "entity" not in data:
if data["type"] != MetricType.DERIVED:
raise CompilationError(
f"The metric {data['name']} is missing the required entity property."
)
elif "entity" in data:
if data["type"] == MetricType.DERIVED:
raise CompilationError(
f"The metric {data['name']} is derived, which does not support entity definition."
)
# TODO: Replace this hacky way to verify an entity lookup
# We are doing this to ensure that the entity property is using an entity
# function and not just providing a string
if "entity('" not in data["entity"]:
raise ParsingError(
f"The metric '{data['name']}' does not contain a proper entity('') reference in the entity property."
)
@dataclass

View File

@@ -214,6 +214,7 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
sources: Dict[str, Any] = field(default_factory=dict)
tests: Dict[str, Any] = field(default_factory=dict)
metrics: Dict[str, Any] = field(default_factory=dict)
entities: Dict[str, Any] = field(default_factory=dict)
exposures: Dict[str, Any] = field(default_factory=dict)
vars: Optional[Dict[str, Any]] = field(
default=None,

View File

@@ -1,8 +1,4 @@
from typing import (
Type,
ClassVar,
cast,
)
from typing import Type, ClassVar, cast, List
import re
from dataclasses import fields
from enum import Enum
@@ -129,6 +125,11 @@ class StrEnum(str, SerializableType, Enum):
def _deserialize(cls, value: str):
return cls(value)
@classmethod
def list_names(cls) -> List[str]:
"""List valid names within this enum class"""
return list(cls.__members__.keys())
class HyphenatedDbtClassMixin(dbtClassMixin):
# used by from_dict/to_dict

View File

@@ -218,6 +218,11 @@ class DbtValidationError(DbtRuntimeError):
MESSAGE = "Validation Error"
class DbtSemanticValidationError(DbtRuntimeError):
CODE = 10020
MESSAGE = "Semantic Validation Error"
class ParsingError(DbtRuntimeError):
CODE = 10015
MESSAGE = "Parsing Error"
@@ -872,6 +877,17 @@ class MetricArgsError(CompilationError):
return msg
class EntityArgsError(CompilationError):
def __init__(self, node, args):
self.node = node
self.args = args
super().__init__(msg=self.get_message())
def get_message(self) -> str:
msg = f"entity() takes at most two arguments ({len(self.args)} given)"
return msg
class RefBadContextError(CompilationError):
def __init__(self, node, args):
self.node = node
@@ -1252,6 +1268,7 @@ class EnvVarMissingError(ParsingError):
class TargetNotFoundError(CompilationError):
# NOTE: CM Might be what I'm looking for
def __init__(
self,
node,

View File

@@ -21,7 +21,7 @@ from .selector_spec import (
INTERSECTION_DELIMITER = ","
DEFAULT_INCLUDES: List[str] = ["fqn:*", "source:*", "exposure:*", "metric:*"]
DEFAULT_INCLUDES: List[str] = ["fqn:*", "source:*", "exposure:*", "metric:*", "entity:*"]
DEFAULT_EXCLUDES: List[str] = []

View File

@@ -9,6 +9,7 @@ from dbt.contracts.graph.nodes import (
SourceDefinition,
Exposure,
Metric,
Entity,
GraphMemberNode,
)
from dbt.contracts.graph.manifest import Manifest
@@ -51,8 +52,8 @@ class GraphQueue:
node = self.manifest.expect(node_id)
if node.resource_type != NodeType.Model:
return False
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
assert not isinstance(node, (SourceDefinition, Exposure, Metric))
# must be a Model - tell mypy this won't be a Source or Exposure or Metric or Entity
assert not isinstance(node, (SourceDefinition, Exposure, Metric, Entity))
if node.is_ephemeral:
return False
return True

View File

@@ -163,6 +163,9 @@ class NodeSelector(MethodManager):
elif unique_id in self.manifest.metrics:
metric = self.manifest.metrics[unique_id]
return metric.config.enabled
elif unique_id in self.manifest.entities:
entity = self.manifest.entities[unique_id]
return entity.config.enabled
node = self.manifest.nodes[unique_id]
return not node.empty and node.config.enabled
@@ -182,6 +185,8 @@ class NodeSelector(MethodManager):
node = self.manifest.exposures[unique_id]
elif unique_id in self.manifest.metrics:
node = self.manifest.metrics[unique_id]
elif unique_id in self.manifest.entities:
node = self.manifest.entities[unique_id]
else:
raise DbtInternalError(f"Node {unique_id} not found in the manifest!")
return self.node_is_match(node)

View File

@@ -12,6 +12,7 @@ from dbt.contracts.graph.nodes import (
SingularTestNode,
Exposure,
Metric,
Entity,
GenericTestNode,
SourceDefinition,
ResultNode,
@@ -43,6 +44,7 @@ class MethodName(StrEnum):
State = "state"
Exposure = "exposure"
Metric = "metric"
Entity = "entity"
Result = "result"
SourceStatus = "source_status"
@@ -71,7 +73,7 @@ def is_selected_node(fqn: List[str], node_selector: str):
return True
SelectorTarget = Union[SourceDefinition, ManifestNode, Exposure, Metric]
SelectorTarget = Union[SourceDefinition, ManifestNode, Exposure, Metric, Entity]
class SelectorMethod(metaclass=abc.ABCMeta):
@@ -118,6 +120,14 @@ class SelectorMethod(metaclass=abc.ABCMeta):
continue
yield unique_id, metric
def entity_nodes(self, included_nodes: Set[UniqueId]) -> Iterator[Tuple[UniqueId, Entity]]:
for key, metric in self.manifest.entities.items():
unique_id = UniqueId(key)
if unique_id not in included_nodes:
continue
yield unique_id, metric
def all_nodes(
self, included_nodes: Set[UniqueId]
) -> Iterator[Tuple[UniqueId, SelectorTarget]]:
@@ -126,6 +136,7 @@ class SelectorMethod(metaclass=abc.ABCMeta):
self.source_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes),
self.entity_nodes(included_nodes),
)
def configurable_nodes(
@@ -136,11 +147,12 @@ class SelectorMethod(metaclass=abc.ABCMeta):
def non_source_nodes(
self,
included_nodes: Set[UniqueId],
) -> Iterator[Tuple[UniqueId, Union[Exposure, ManifestNode, Metric]]]:
) -> Iterator[Tuple[UniqueId, Union[Exposure, ManifestNode, Metric, Entity]]]:
yield from chain(
self.parsed_nodes(included_nodes),
self.exposure_nodes(included_nodes),
self.metric_nodes(included_nodes),
self.entity_nodes(included_nodes),
)
@abc.abstractmethod
@@ -270,6 +282,33 @@ class MetricSelectorMethod(SelectorMethod):
yield node
class EntitySelectorMethod(SelectorMethod):
"""TODO: Add a description of what this selector method is doing"""
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:
parts = selector.split(".")
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_name = parts[0]
elif len(parts) == 2:
target_package, target_name = parts
else:
msg = (
'Invalid entity selector value "{}". Entities must be of '
"the form ${{entity_name}} or "
"${{entity_package.entity_name}}"
).format(selector)
raise DbtRuntimeError(msg)
for node, real_node in self.entity_nodes(included_nodes):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_name not in (real_node.name, SELECTOR_GLOB):
continue
yield node
class PathSelectorMethod(SelectorMethod):
def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[UniqueId]:
"""Yields nodes from included that match the given path."""
@@ -530,6 +569,8 @@ class StateSelectorMethod(SelectorMethod):
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
elif node in manifest.entities:
previous_node = manifest.entities[node]
if checker(previous_node, real_node):
yield node
@@ -616,6 +657,7 @@ class MethodManager:
MethodName.State: StateSelectorMethod,
MethodName.Exposure: ExposureSelectorMethod,
MethodName.Metric: MetricSelectorMethod,
MethodName.Entity: EntitySelectorMethod,
MethodName.Result: ResultSelectorMethod,
MethodName.SourceStatus: SourceStatusSelectorMethod,
}

View File

@@ -32,6 +32,7 @@ class NodeType(StrEnum):
Macro = "macro"
Exposure = "exposure"
Metric = "metric"
Entity = "entity"
Group = "group"
@classmethod
@@ -67,11 +68,14 @@ class NodeType(StrEnum):
cls.Analysis,
cls.Exposure,
cls.Metric,
cls.Entity,
]
def pluralize(self) -> str:
if self is self.Analysis:
return "analyses"
if self is self.Entity:
return "entities"
return f"{self}s"

View File

@@ -11,6 +11,8 @@ from dbt.events.base_types import EventLevel
import json
import pprint
from dbt.contracts.graph.metrics import UnparsedMetricInput
import dbt.exceptions
import dbt.tracking
import dbt.utils
@@ -62,6 +64,7 @@ from dbt.contracts.graph.nodes import (
ColumnInfo,
Exposure,
Metric,
Entity,
SeedNode,
ManifestNode,
ResultNode,
@@ -83,6 +86,8 @@ from dbt.parser.snapshots import SnapshotParser
from dbt.parser.sources import SourcePatcher
from dbt.version import __version__
from dbt.semantic.validations.model_validator import ModelValidator
from dbt.dataclass_schema import StrEnum, dbtClassMixin
MANIFEST_FILE_NAME = "manifest.json"
@@ -354,7 +359,7 @@ class ManifestLoader:
project, project_parser_files[project.project_name], parser_types
)
# Now that we've loaded most of the nodes (except for schema tests, sources, metrics)
# Now that we've loaded most of the nodes (except for schema tests, sources, metrics, entities)
# load up the Lookup objects to resolve them by name, so the SourceFiles store
# the unique_id instead of the name. Sources are loaded from yaml files, so
# aren't in place yet
@@ -373,6 +378,23 @@ class ManifestLoader:
self.process_nodes()
# Validate semantic model
# TODO: Figure out how to have this be its own area
semantic_result = ModelValidator().validate_model(self.manifest.user_configured_model)
if semantic_result.issues.has_blocking_issues:
error_message = "\n".join(
issue.as_cli_formatted_str() for issue in semantic_result.issues.errors
)
warning_message = "\n".join(
issue.as_cli_formatted_str() for issue in semantic_result.issues.warnings
)
future_error_message = "\n".join(
issue.as_cli_formatted_str() for issue in semantic_result.issues.future_errors
)
raise dbt.exceptions.DbtSemanticValidationError(
error_message + warning_message + future_error_message
)
self._perf_info.parse_project_elapsed = time.perf_counter() - start_parse_projects
# patch_sources converts the UnparsedSourceDefinitions in the
@@ -390,13 +412,14 @@ class ManifestLoader:
# copy the selectors from the root_project to the manifest
self.manifest.selectors = self.root_project.manifest_selectors
# update the refs, sources, docs and metrics depends_on.nodes
# update the refs, sources, docs, entities and metrics depends_on.nodes
# These check the created_at time on the nodes to
# determine whether they need processing.
start_process = time.perf_counter()
self.process_sources(self.root_project.project_name)
self.process_refs(self.root_project.project_name)
self.process_docs(self.root_project)
self.process_entities(self.root_project)
self.process_metrics(self.root_project)
# update tracking data
@@ -863,19 +886,51 @@ class ManifestLoader:
if exposure.created_at < self.started_at:
continue
_process_refs_for_exposure(self.manifest, current_project, exposure)
for metric in self.manifest.metrics.values():
if metric.created_at < self.started_at:
for entity in self.manifest.entities.values():
if entity.created_at < self.started_at:
continue
_process_refs_for_metric(self.manifest, current_project, metric)
_process_refs_for_entity(self.manifest, current_project, entity)
# Metrics can only be based on entities now
# for metric in self.manifest.metrics.values():
# if metric.created_at < self.started_at:
# continue
# _process_refs_for_metric(self.manifest, current_project, metric)
# Takes references in 'metrics' array of nodes and exposures, finds the target
# TODO: Get rid of this? Entities can't reference other entities, only identifiers
# Takes references in 'entities' array of nodes and exposures, finds the target
# node, and updates 'depends_on.nodes' with the unique id
def process_metrics(self, config: RuntimeConfig):
def process_entities(self, config: RuntimeConfig):
current_project = config.project_name
for node in self.manifest.nodes.values():
if node.created_at < self.started_at:
continue
_process_metrics_for_node(self.manifest, current_project, node)
_process_entities_for_node(self.manifest, current_project, node)
for entity in self.manifest.entities.values():
if entity.created_at < self.started_at:
continue
_process_entities_for_node(self.manifest, current_project, entity)
for metric in self.manifest.metrics.values():
if metric.created_at < self.started_at:
continue
_process_entities_for_node(self.manifest, current_project, metric)
for exposure in self.manifest.exposures.values():
if exposure.created_at < self.started_at:
continue
_process_entities_for_node(self.manifest, current_project, exposure)
# Takes references in 'metrics' array of metrics and exposures, finds the target
# node, and updates 'depends_on.nodes' with the unique id
def process_metrics(self, config: RuntimeConfig):
current_project = config.project_name
# NOTE: Commenting this out as metrics can now only be built on entities
# for node in self.manifest.nodes.values():
# if node.created_at < self.started_at:
# continue
# _process_metrics_for_node(self.manifest, current_project, node)
for entity in self.manifest.entities.values():
if entity.created_at < self.started_at:
continue
_process_metrics_for_node(self.manifest, current_project, entity)
for metric in self.manifest.metrics.values():
# TODO: Can we do this if the metric is derived & depends on
# some other metric for its definition? Maybe....
@@ -942,6 +997,16 @@ class ManifestLoader:
config.project_name,
)
_process_docs_for_metrics(ctx, metric)
for entity in self.manifest.entities.values():
if entity.created_at < self.started_at:
continue
ctx = generate_runtime_docs_context(
config,
entity,
self.manifest,
config.project_name,
)
_process_docs_for_entities(ctx, entity)
# Loops through all nodes and exposures, for each element in
# 'sources' array finds the source node and updates the
@@ -1155,6 +1220,10 @@ def _process_docs_for_exposure(context: Dict[str, Any], exposure: Exposure) -> N
exposure.description = get_rendered(exposure.description, context)
def _process_docs_for_entities(context: Dict[str, Any], entity: Entity) -> None:
entity.description = get_rendered(entity.description, context)
def _process_docs_for_metrics(context: Dict[str, Any], metric: Metric) -> None:
metric.description = get_rendered(metric.description, context)
@@ -1203,9 +1272,53 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur
manifest.update_exposure(exposure)
def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: Metric):
"""Given a manifest and a metric in that manifest, process its refs"""
for ref in metric.refs:
# NOTE: commenting out as metrics are based on entities now
# def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: Metric):
# """Given a manifest and a metric in that manifest, process its refs"""
# for ref in metric.refs:
# target_model: Optional[Union[Disabled, ManifestNode]] = None
# target_model_name: str
# target_model_package: Optional[str] = None
# if len(ref) == 1:
# target_model_name = ref[0]
# elif len(ref) == 2:
# target_model_package, target_model_name = ref
# else:
# raise dbt.exceptions.DbtInternalError(
# f"Refs should always be 1 or 2 arguments - got {len(ref)}"
# )
# target_model = manifest.resolve_ref(
# target_model_name,
# target_model_package,
# current_project,
# metric.package_name,
# )
# if target_model is None or isinstance(target_model, Disabled):
# # This may raise. Even if it doesn't, we don't want to add
# # this metric to the graph b/c there is no destination metric
# metric.config.enabled = False
# invalid_target_fail_unless_test(
# node=metric,
# target_name=target_model_name,
# target_kind="node",
# target_package=target_model_package,
# disabled=(isinstance(target_model, Disabled)),
# should_warn_if_disabled=False,
# )
# continue
# target_model_id = target_model.unique_id
# metric.depends_on.nodes.append(target_model_id)
# manifest.update_metric(metric)
def _process_refs_for_entity(manifest: Manifest, current_project: str, entity: Entity):
"""Given a manifest and an entity in that manifest, process its refs"""
for ref in entity.refs:
target_model: Optional[Union[Disabled, ManifestNode]] = None
target_model_name: str
target_model_package: Optional[str] = None
@@ -1223,33 +1336,32 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M
target_model_name,
target_model_package,
current_project,
metric.package_name,
entity.package_name,
)
if target_model is None or isinstance(target_model, Disabled):
# This may raise. Even if it doesn't, we don't want to add
# this metric to the graph b/c there is no destination metric
metric.config.enabled = False
# this entity to the graph b/c there is no destination entity
entity.config.enabled = False
invalid_target_fail_unless_test(
node=metric,
node=entity,
target_name=target_model_name,
target_kind="node",
target_package=target_model_package,
disabled=(isinstance(target_model, Disabled)),
should_warn_if_disabled=False,
)
continue
target_model_id = target_model.unique_id
metric.depends_on.nodes.append(target_model_id)
manifest.update_metric(metric)
entity.depends_on.nodes.append(target_model_id)
manifest.update_entity(entity)
def _process_metrics_for_node(
manifest: Manifest,
current_project: str,
node: Union[ManifestNode, Metric, Exposure],
node: Union[ManifestNode, Metric, Exposure, Entity],
):
"""Given a manifest and a node in that manifest, process its metrics"""
@@ -1262,7 +1374,10 @@ def _process_metrics_for_node(
target_metric_package: Optional[str] = None
if len(metric) == 1:
target_metric_name = metric[0]
if isinstance(metric[0], UnparsedMetricInput):
target_metric_name = metric[0].name
else:
target_metric_name = metric[0]
elif len(metric) == 2:
target_metric_package, target_metric_name = metric
else:
@@ -1295,6 +1410,55 @@ def _process_metrics_for_node(
node.depends_on.nodes.append(target_metric_id)
def _process_entities_for_node(
manifest: Manifest,
current_project: str,
node: Union[ManifestNode, Entity, Exposure, Metric],
):
"""Given a manifest and a node in that manifest, process its entities"""
if isinstance(node, SeedNode):
return
for entity in node.entities:
target_entity: Optional[Union[Disabled, Entity]] = None
target_entity_name: str
target_entity_package: Optional[str] = None
if len(entity) == 1:
target_entity_name = entity[0]
elif len(entity) == 2:
target_entity_package, target_entity_name = entity
else:
raise dbt.exceptions.DbtInternalError(
f"Entity references should always be 1 or 2 arguments - got {len(entity)}"
)
target_entity = manifest.resolve_entity(
target_entity_name,
target_entity_package,
current_project,
node.package_name,
)
if target_entity is None or isinstance(target_entity, Disabled):
# This may raise. Even if it doesn't, we don't want to add
# this node to the graph b/c there is no destination node
node.config.enabled = False
invalid_target_fail_unless_test(
node=node,
target_name=target_entity_name,
target_kind="entity",
target_package=target_entity_package,
disabled=(isinstance(target_entity, Disabled)),
)
continue
target_entity_id = target_entity.unique_id
node.depends_on.nodes.append(target_entity_id)
def _process_refs_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
"""Given a manifest and a node in that manifest, process its refs"""
@@ -1369,27 +1533,28 @@ def _process_sources_for_exposure(manifest: Manifest, current_project: str, expo
manifest.update_exposure(exposure)
def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
target_source: Optional[Union[Disabled, SourceDefinition]] = None
for source_name, table_name in metric.sources:
target_source = manifest.resolve_source(
source_name,
table_name,
current_project,
metric.package_name,
)
if target_source is None or isinstance(target_source, Disabled):
metric.config.enabled = False
invalid_target_fail_unless_test(
node=metric,
target_name=f"{source_name}.{table_name}",
target_kind="source",
disabled=(isinstance(target_source, Disabled)),
)
continue
target_source_id = target_source.unique_id
metric.depends_on.nodes.append(target_source_id)
manifest.update_metric(metric)
# TODO: Remove this code because metrics can't be based on sources
# def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
# target_source: Optional[Union[Disabled, SourceDefinition]] = None
# for source_name, table_name in metric.sources:
# target_source = manifest.resolve_source(
# source_name,
# table_name,
# current_project,
# metric.package_name,
# )
# if target_source is None or isinstance(target_source, Disabled):
# metric.config.enabled = False
# invalid_target_fail_unless_test(
# node=metric,
# target_name=f"{source_name}.{table_name}",
# target_kind="source",
# disabled=(isinstance(target_source, Disabled)),
# )
# continue
# target_source_id = target_source.unique_id
# metric.depends_on.nodes.append(target_source_id)
# manifest.update_metric(metric)
def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):

View File

@@ -242,7 +242,7 @@ class PartialParsing:
self.remove_source_override_target(source)
def delete_disabled(self, unique_id, file_id):
# This node/metric/exposure is disabled. Find it and remove it from disabled dictionary.
# This node/metric/entity/exposure is disabled. Find it and remove it from disabled dictionary.
for dis_index, dis_node in enumerate(self.saved_manifest.disabled[unique_id]):
if dis_node.file_id == file_id:
node = dis_node
@@ -441,6 +441,18 @@ class PartialParsing:
if metric_element:
self.delete_schema_metric(schema_file, metric_element)
self.merge_patch(schema_file, "metrics", metric_element)
elif unique_id in self.saved_manifest.entities:
entity = self.saved_manifest.entities[unique_id]
file_id = entity.file_id
if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
schema_file = self.saved_files[file_id]
entities = []
if "entities" in schema_file.dict_from_yaml:
entities = schema_file.dict_from_yaml["entities"]
entity_element = self.get_schema_element(entities, entity.name)
if entity_element:
self.delete_schema_entity(schema_file, entity_element)
self.merge_patch(schema_file, "entities", entity_element)
elif unique_id in self.saved_manifest.macros:
macro = self.saved_manifest.macros[unique_id]
file_id = macro.file_id
@@ -746,6 +758,29 @@ class PartialParsing:
self.delete_schema_metric(schema_file, elem)
self.merge_patch(schema_file, dict_key, elem)
# entities
dict_key = "entities"
entity_diff = self.get_diff_for("entities", saved_yaml_dict, new_yaml_dict)
if entity_diff["changed"]:
for entity in entity_diff["changed"]:
self.delete_schema_entity(schema_file, entity)
self.merge_patch(schema_file, dict_key, entity)
if entity_diff["deleted"]:
for entity in entity_diff["deleted"]:
self.delete_schema_entity(schema_file, entity)
if entity_diff["added"]:
for entity in entity_diff["added"]:
self.merge_patch(schema_file, dict_key, entity)
# Handle schema file updates due to env_var changes
if dict_key in env_var_changes and dict_key in new_yaml_dict:
for name in env_var_changes[dict_key]:
if name in entity_diff["changed_or_deleted_names"]:
continue
elem = self.get_schema_element(new_yaml_dict[dict_key], name)
if elem:
self.delete_schema_entity(schema_file, elem)
self.merge_patch(schema_file, dict_key, elem)
# groups
dict_key = "groups"
group_diff = self.get_diff_for("groups", saved_yaml_dict, new_yaml_dict)
@@ -958,6 +993,24 @@ class PartialParsing:
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
# entities are created only from schema files, but also can be referred to by other nodes
def delete_schema_entity(self, schema_file, entity_dict):
entity_name = entity_dict["name"]
entities = schema_file.entities.copy()
for unique_id in entities:
if unique_id in self.saved_manifest.entities:
entity = self.saved_manifest.entities[unique_id]
if entity.name == entity_name:
# Need to find everything that referenced this entity and schedule for parsing
if unique_id in self.saved_manifest.child_map:
self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
self.deleted_manifest.entities[unique_id] = self.saved_manifest.entities.pop(
unique_id
)
schema_file.entities.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
def get_schema_element(self, elem_list, elem_name):
for element in elem_list:
if "name" in element and element["name"] == elem_name:

View File

@@ -21,11 +21,12 @@ from dbt.context.configured import generate_schema_yml_context, SchemaYamlVars
from dbt.context.providers import (
generate_parse_exposure,
generate_parse_metrics,
generate_parse_entities,
generate_test_context,
)
from dbt.context.macro_resolver import MacroResolver
from dbt.contracts.files import FileHash, SchemaSourceFile
from dbt.contracts.graph.model_config import MetricConfig, ExposureConfig
from dbt.contracts.graph.model_config import MetricConfig, ExposureConfig, EntityConfig
from dbt.contracts.graph.nodes import (
ParsedNodePatch,
ColumnInfo,
@@ -34,6 +35,7 @@ from dbt.contracts.graph.nodes import (
UnpatchedSourceDefinition,
Exposure,
Metric,
Entity,
Group,
ManifestNode,
GraphMemberNode,
@@ -49,9 +51,11 @@ from dbt.contracts.graph.unparsed import (
UnparsedNodeUpdate,
UnparsedExposure,
UnparsedMetric,
UnparsedEntity,
UnparsedSourceDefinition,
UnparsedGroup,
)
from dbt.contracts.graph.measures import Measure
from dbt.exceptions import (
CompilationError,
DuplicateMacroPatchNameError,
@@ -84,6 +88,27 @@ from dbt.parser.generic_test_builders import (
Testable,
)
from dbt.utils import get_pseudo_test_path, coerce_dict_str, md5
from dbt.semantic.transformations.entity_transformations.boolean_measure_aggregation import (
BooleanMeasureAggregation,
)
from dbt.semantic.transformations.entity_transformations.composite_identifier_expressions import (
CompositeIdentifierExpressionRule,
)
from dbt.semantic.transformations.entity_transformations.convert_count import ConvertCountToSum
from dbt.semantic.transformations.entity_transformations.convert_median import (
ConvertMedianToPercentile,
)
from dbt.semantic.transformations.entity_transformations.lowercase_names import LowerCaseNames
from dbt.semantic.transformations.entity_transformations.measure_aggregation_time_dimension import (
SetMeasureAggregationTimeDimension,
)
from dbt.semantic.transformations.entity_transformations.proxy_measure import ProxyMeasure
from dbt.semantic.transformations.metric_transformations.add_input_metric_measures import (
AddInputMetricMeasures,
)
from dbt.semantic.transformations.metric_transformations.convert_type_params import (
ConvertTypeParams,
)
TestDef = Union[str, Dict[str, Any]]
@@ -97,6 +122,7 @@ schema_file_keys = (
"analyses",
"exposures",
"metrics",
"entities",
)
@@ -117,6 +143,7 @@ class ParserRef:
def __init__(self):
self.column_info: Dict[str, ColumnInfo] = {}
# TODO: Mimic this for dimension information at the entity level
def add(
self,
column: Union[HasDocs, UnparsedColumn],
@@ -517,7 +544,6 @@ class SchemaParser(SimpleParser[GenericTestBlock, GenericTestNode]):
yaml_block = YamlBlock.from_file_block(block, dct)
parser: YamlDocsReader
# There are 7 kinds of parsers:
# Model, Seed, Snapshot, Source, Macro, Analysis, Exposures
@@ -564,10 +590,16 @@ class SchemaParser(SimpleParser[GenericTestBlock, GenericTestNode]):
exp_parser = ExposureParser(self, yaml_block)
exp_parser.parse()
# parse entities
if "entities" in dct:
entity_parser = EntityParser(self, yaml_block)
entity_parser.parse()
# parse metrics
if "metrics" in dct:
metric_parser = MetricParser(self, yaml_block)
metric_parser.parse()
metric_parser.transform()
# parse groups
if "groups" in dct:
@@ -1129,15 +1161,17 @@ class ExposureParser(YamlReader):
config=config,
unrendered_config=unrendered_config,
)
ctx = generate_parse_exposure(
parsed,
self.root_project,
self.schema_parser.manifest,
package_name,
)
depends_on_jinja = "\n".join("{{ " + line + "}}" for line in unparsed.depends_on)
get_rendered(depends_on_jinja, ctx, parsed, capture_macros=True)
# parsed now has a populated refs/sources/metrics
# parsed now has a populated refs/sources/metrics/entities
if parsed.config.enabled:
self.manifest.add_exposure(self.yaml.file, parsed)
@@ -1178,6 +1212,149 @@ class ExposureParser(YamlReader):
self.parse_exposure(unparsed)
class EntityParser(YamlReader):
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):
super().__init__(schema_parser, yaml, NodeType.Entity.pluralize())
self.schema_parser = schema_parser
self.yaml = yaml
def parse_entity(self, unparsed: UnparsedEntity):
package_name = self.project.project_name
unique_id = f"{NodeType.Entity}.{package_name}.{unparsed.name}"
entity_model_name = unparsed.model.replace('"', "'").split("'")[1]
model_key = f"model.{package_name}.{entity_model_name}"
path = self.yaml.path.relative_path
fqn = self.schema_parser.get_fqn_prefix(path)
fqn.append(unparsed.name)
config = self._generate_entity_config(
target=unparsed,
fqn=fqn,
package_name=package_name,
rendered=True,
)
config = config.finalize_and_validate()
unrendered_config = self._generate_entity_config(
target=unparsed,
fqn=fqn,
package_name=package_name,
rendered=False,
)
if not isinstance(config, EntityConfig):
raise DbtInternalError(
f"Calculated a {type(config)} for an entity, but expected a EntityConfig"
)
parsed = Entity(
resource_type=NodeType.Entity,
package_name=package_name,
path=path,
original_file_path=self.yaml.path.original_file_path,
unique_id=unique_id,
fqn=fqn,
model=unparsed.model,
sql_table=self.schema_parser.manifest.nodes[model_key].relation_name,
name=unparsed.name,
description=unparsed.description,
identifiers=unparsed.identifiers,
dimensions=unparsed.dimensions,
measures=unparsed.measures,
origin=unparsed.origin,
meta=unparsed.meta,
tags=unparsed.tags,
config=config,
unrendered_config=unrendered_config,
)
parsed = ConvertCountToSum._transform_entity(entity=parsed)
parsed = LowerCaseNames._transform_entity(entity=parsed)
parsed = CompositeIdentifierExpressionRule._transform_entity(entity=parsed)
parsed = ConvertMedianToPercentile._transform_entity(entity=parsed)
parsed = BooleanMeasureAggregation._transform_entity(entity=parsed)
parsed = SetMeasureAggregationTimeDimension._transform_entity(entity=parsed)
ctx = generate_parse_entities(
entity=parsed,
config=self.root_project,
manifest=self.schema_parser.manifest,
package_name=package_name,
)
self = ProxyMeasure._create_proxy_metrics(self, parsed_entity=parsed, path=path, fqn=fqn)
if parsed.model is not None:
model_ref = "{{ " + parsed.model + " }}"
get_rendered(model_ref, ctx, parsed)
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
if parsed.config.enabled:
# self.manifest.add_metric(self.yaml.file, parsed)
self.manifest.add_entity(self.yaml.file, parsed)
else:
self.manifest.add_disabled(self.yaml.file, parsed)
def _generate_entity_config(
self, target: UnparsedEntity, fqn: List[str], package_name: str, rendered: bool
):
generator: BaseContextConfigGenerator
if rendered:
generator = ContextConfigGenerator(self.root_project)
else:
generator = UnrenderedConfigGenerator(self.root_project)
# configs with precendence set
precedence_configs = dict()
# first apply metric configs
precedence_configs.update(target.config)
return generator.calculate_node_config(
config_call_dict={},
fqn=fqn,
resource_type=NodeType.Entity,
project_name=package_name,
base=False,
patch_config_dict=precedence_configs,
)
def _generate_proxy_metric_config(
self, target: Measure, fqn: List[str], package_name: str, rendered: bool
):
generator: BaseContextConfigGenerator
if rendered:
generator = ContextConfigGenerator(self.root_project)
else:
generator = UnrenderedConfigGenerator(self.root_project)
# configs with precendence set
precedence_configs = dict()
# first apply metric configs
precedence_configs.update(target.config)
return generator.calculate_node_config(
config_call_dict={},
fqn=fqn,
resource_type=NodeType.Metric,
project_name=package_name,
base=False,
patch_config_dict=precedence_configs,
)
def parse(self):
for data in self.get_key_dicts():
try:
UnparsedEntity.validate(data)
unparsed = UnparsedEntity.from_dict(data)
except (ValidationError, JSONValidationError) as exc:
raise YamlParseDictError(self.yaml.path, self.key, data, exc)
self.parse_entity(unparsed)
class MetricParser(YamlReader):
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):
super().__init__(schema_parser, yaml, NodeType.Metric.pluralize())
@@ -1208,11 +1385,16 @@ class MetricParser(YamlReader):
rendered=False,
)
parsed_metric_type_params = ConvertTypeParams._get_metric_type_params(unparsed.type_params)
if not isinstance(config, MetricConfig):
raise DbtInternalError(
f"Calculated a {type(config)} for a metric, but expected a MetricConfig"
)
# Given that derived metrics now come from type params CM added new way
# of defining that relationship to the metrics part that is needed for exposures
parsed = Metric(
resource_type=NodeType.Metric,
package_name=package_name,
@@ -1220,17 +1402,13 @@ class MetricParser(YamlReader):
original_file_path=self.yaml.path.original_file_path,
unique_id=unique_id,
fqn=fqn,
model=unparsed.model,
name=unparsed.name,
entity=unparsed.entity,
description=unparsed.description,
label=unparsed.label,
calculation_method=unparsed.calculation_method,
expression=str(unparsed.expression),
timestamp=unparsed.timestamp,
dimensions=unparsed.dimensions,
window=unparsed.window,
time_grains=unparsed.time_grains,
filters=unparsed.filters,
type=unparsed.type,
type_params=parsed_metric_type_params,
constraint=unparsed.constraint,
metrics=[[metric.name] for metric in parsed_metric_type_params.metrics],
meta=unparsed.meta,
tags=unparsed.tags,
config=config,
@@ -1244,15 +1422,10 @@ class MetricParser(YamlReader):
package_name,
)
if parsed.model is not None:
model_ref = "{{ " + parsed.model + " }}"
get_rendered(model_ref, ctx, parsed)
parsed.expression = get_rendered(
parsed.expression,
ctx,
node=parsed,
)
if parsed.entity is not None:
entity_ref = "{{ " + parsed.entity + " }}"
# The get rendered is the step that adds the dependencies
get_rendered(entity_ref, ctx, parsed)
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
if parsed.config.enabled:
@@ -1293,6 +1466,15 @@ class MetricParser(YamlReader):
raise YamlParseDictError(self.yaml.path, self.key, data, exc)
self.parse_metric(unparsed)
def transform(self):
# We validate here for the input metric measurs because
# we need all of the metrics to be parsed. This exists for
# derived metrics
metrics = [metric for metric in self.manifest.metrics.values()]
for metric in metrics:
metric = AddInputMetricMeasures.add_input_metrics(metric=metric, metrics=metrics)
# self.update_metric
class GroupParser(YamlReader):
def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock):

View File

@@ -0,0 +1,80 @@
from dbt.dataclass_schema import StrEnum
class AggregationType(StrEnum):
"""Aggregation methods for measures"""
SUM = "sum"
MIN = "min"
MAX = "max"
COUNT = "count"
COUNT_DISTINCT = "count_distinct"
SUM_BOOLEAN = "sum_boolean"
AVERAGE = "average"
PERCENTILE = "percentile"
MEDIAN = "median"
@property
def is_additive(self) -> bool:
"""Indicates that if you sum values over a dimension grouping, you will still get an accurate result for this metric."""
if (
self is AggregationType.SUM
or self is AggregationType.SUM_BOOLEAN
or self is AggregationType.COUNT
):
return True
elif (
self is AggregationType.MIN
or self is AggregationType.MAX
or self is AggregationType.COUNT_DISTINCT
or self is AggregationType.SUM_BOOLEAN
or self is AggregationType.AVERAGE
or self is AggregationType.PERCENTILE
or self is AggregationType.MEDIAN
):
return False
# else:
# assert_values_exhausted(self)
@property
def is_expansive(self) -> bool:
"""Expansive ≝ Op( X Y ...) = Op( Op(X) Op(Y) ...)
NOTE: COUNT is only expansive because it's transformed into a SUM agg during model transformation
"""
return self in (
AggregationType.SUM,
AggregationType.MIN,
AggregationType.MAX,
AggregationType.SUM_BOOLEAN,
AggregationType.COUNT,
)
@property
def fill_nulls_with_0(self) -> bool:
"""Indicates if charts should show 0 instead of null where there are gaps in data."""
return self in (
AggregationType.SUM,
AggregationType.COUNT_DISTINCT,
AggregationType.SUM_BOOLEAN,
AggregationType.COUNT,
)
@property
def can_limit_dimension_values(self) -> bool:
"""Indicates if we can limit dimension values in charts.
Currently, this means:
1. The dimensions we care about most are the ones with the highest numeric values
2. We can calculate the "other" column in the postprocessor (meaning the metric is expansive)
"""
return self in (AggregationType.SUM, AggregationType.SUM_BOOLEAN, AggregationType.COUNT)
class AggregationState(StrEnum):
"""Represents how the measure is aggregated."""
NON_AGGREGATED = "NON_AGGREGATED"
PARTIAL = "PARTIAL"
COMPLETE = "COMPLETE"
def __repr__(self) -> str: # noqa: D
return f"{self.__class__.__name__}.{self.name}"

View File

@@ -0,0 +1,142 @@
from __future__ import annotations
import re
from typing import List, Dict, Any
from dataclasses import dataclass, field
from dbt.contracts.util import Mergeable
from mo_sql_parsing import parse as mo_parse
from dbt.dataclass_schema import dbtClassMixin
from dbt.exceptions import DbtSemanticValidationError
from dbt.semantic.sql_bind_parameters import SqlBindParameters
LITERAL_STR = "literal"
INTERVAL_LITERAL = "interval"
@dataclass
class WhereClauseConstraint(dbtClassMixin, Mergeable):
"""Contains a string that is a where clause"""
where_clause: str = ""
linkable_names: List[str] = field(default_factory=list)
sql_params: SqlBindParameters = SqlBindParameters()
# def __init__( # noqa: D
# self,
# where_clause: str = "",
# linkable_names: Optional[List[str]] = None,
# sql_params: Optional[SqlBindParameters] = None,
# # sql params: user-originated sql params that need to be escaped in a dialect-specific way keys are the
# # name of the template value in the `where` string, values are the string to be escaped and
# # inserted into the where string (ie where = "%(1)s", sql_values = {"1": "cote d'ivoire"})
# ) -> WhereClauseConstraint:
# where_clause = where_clause.strip("\n") if where_clause else ""
# linkable_names = linkable_names or []
# if sql_params is None:
# sql_params = SqlBindParameters()
# super().__init__(
# where_clause=where_clause,
# linkable_names=linkable_names,
# sql_params=sql_params,
# )
@staticmethod
def parse(s: str) -> WhereClauseConstraint:
"""Parse a string into a WhereClauseConstraint
We are assuming here that if we needed to parse a string, we wouldn't have bind parameters.
Because if we had bind-parameters, the string would have existing structure, and we wouldn't need to parse it.
"""
s = strip_where_clause(s)
where_clause_str = f"WHERE {s}"
# to piggyback on moz sql parser we need a SELECT statement
# moz breaks the sql statement into clauses:
# where_clause_str = "WHERE is_instant" yields -> {'select': {'value': '_'}, 'from': '_', 'where': 'is_instant'}
# where_clause_str = "WHERE is_instant AND country = 'vanuatu' AND is_lux or ds < '2020-01-02'" yields ->
# {'select': {'value': '_'}, 'from': '_', 'where_clause': {'or': [{'and': ['is_instant', {'eq': ['country', {'literal': 'vanuatu'}]}, 'is_lux']}, {'lt': ['ds', {'literal': '2020-01-02'}]}]}}
parsed = mo_parse(f"select _ from _ {where_clause_str}")
if "where" not in parsed:
raise DbtSemanticValidationError(parsed)
where_clause = parsed["where"]
if isinstance(where_clause, dict):
if not len(where_clause.keys()) == 1:
raise DbtSemanticValidationError(
f"expected parsed constraint to contain exactly one key; got {where_clause}"
)
return WhereClauseConstraint(
where_clause=s,
linkable_names=constraint_dimension_names_from_dict(where_clause),
sql_params=SqlBindParameters(),
)
elif isinstance(where_clause, str):
return WhereClauseConstraint(
where_clause=s,
linkable_names=[where_clause.strip()],
sql_params=SqlBindParameters(),
)
else:
raise TypeError(
f"where-clause is neither a dict nor a string. Unexpectedly it is a {type(where_clause)}"
)
def __repr__(self) -> str: # noqa: D
return (
f"{self.__class__.__name__}"
f"(where_clause={self.where_clause}, linkable_names={self.linkable_names})"
)
def strip_where_clause(s: str) -> str:
"""Removes WHERE from the beginning of the string, if present (regardless of case)"""
# '^' tells the regex to only check the beginning of the string
return re.sub("^where ", "", s, flags=re.IGNORECASE)
def constraint_dimension_names_from_dict(where_clause: Dict[str, Any]) -> List[str]: # type: ignore[misc] # noqa: D
dims = []
for key, clause in where_clause.items():
if key == LITERAL_STR or key == INTERVAL_LITERAL:
continue
dims += _get_dimensions_from_clause(clause)
return dims
def constraint_values_from_dict(where_clause: Dict[str, Any]) -> List[str]: # type: ignore[misc] # noqa: d
values = []
for key, clause in where_clause.items():
if key == LITERAL_STR:
values.append(clause)
elif isinstance(clause, dict):
values += constraint_values_from_dict(clause)
elif isinstance(clause, list):
for item in clause:
if isinstance(item, dict):
values += constraint_values_from_dict(item)
return values
def _constraint_dimensions_from_list(list_clause: List[Any]) -> List[str]: # type: ignore[misc] # noqa: D
dims = []
for clause in list_clause:
dims += _get_dimensions_from_clause(clause)
return dims
def _get_dimensions_from_clause(clause: Any) -> List[str]: # type: ignore[misc] # noqa: D
if clause is not None:
if isinstance(clause, dict):
return constraint_dimension_names_from_dict(clause)
elif isinstance(clause, list):
return _constraint_dimensions_from_list(clause)
elif isinstance(clause, str):
return [clause.strip()]
return []

View File

@@ -0,0 +1,278 @@
from __future__ import annotations
import pprint
import textwrap
import itertools
import random
import string
from collections import OrderedDict, defaultdict, deque
from collections.abc import Mapping
from dataclasses import is_dataclass, fields
import datetime
from hashlib import sha1
from typing import Sequence, TypeVar, Tuple, NoReturn, Union
from dbt.dataclass_schema import dbtClassMixin
def assert_values_exhausted(value: NoReturn) -> NoReturn:
"""Helper method to allow MyPy to guarantee an exhaustive switch through an enumeration or literal
DO NOT MODIFY THE TYPE SIGNATURE OF THIS FUNCTION UNLESS MYPY CHANGES HOW IT HANDLES THINGS
To use this function correctly you MUST do an exhaustive switch through ALL values, using `is` for comparison
(doing x == SomeEnum.VALUE will not work, nor will `x in (SomeEnum.VALUE_1, SomeEnum.VALUE_2)`).
If mypy raises an error of the form:
`x has incompatible type SomeEnum; expected NoReturn`
the switch is not constructed correctly. Fix your switch statement to use `is` for all comparisons.
If mypy raises an error of the form
`x has incompatible type Union[Literal...]` expected NoReturn`
the switch statement is non-exhaustive, and the values listed in the error message need to be accounted for.
See https://mypy.readthedocs.io/en/stable/literal_types.html#exhaustiveness-checks
For an enum example, see issue:
https://github.com/python/mypy/issues/6366#issuecomment-560369716
"""
assert False, f"Should be unreachable, but got {value}"
def assert_exactly_one_arg_set(**kwargs) -> None: # type: ignore
"""Throws an assertion error if 0 or more than 1 argument is not None."""
num_set = 0
for value in kwargs.values():
if value is not None:
num_set += 1
assert num_set == 1, f"{num_set} argument(s) set instead of 1 in arguments: {kwargs}"
def is_hashable_base_model(obj): # type:ignore # noqa: D
return isinstance(obj, dbtClassMixin)
def _to_pretty_printable_object(obj): # type: ignore
"""Convert the object that will look nicely when fed into the PrettyPrinter.
Main change is that dataclasses will have a field with the class name. In Python 3.10, the pretty printer class will
support dataclasses, so we can remove this once we're on 3.10. Also tried the prettyprint package with dataclasses,
but that prints full names for the classes e.g. a.b.MyClass and it also always added line breaks, even if an object
could fit on one line, so preferred to not use that.
e.g.
metricflow.specs.DimensionSpec(
name='country',
identifier_links=()
),
Instead, the below will print something like:
{'class': 'DimensionSpec',
'name': 'country_latest',
'identifier_links': ({'class': 'IdentifierSpec',
'name': 'listing',
'identifier_links': ()},)}
"""
if obj is None:
return None
elif isinstance(obj, (str, int, float)):
return obj
elif isinstance(obj, (list, tuple)):
result = []
for item in obj:
result.append(_to_pretty_printable_object(item))
if isinstance(obj, list):
return result
elif isinstance(obj, tuple):
return tuple(result)
assert False
elif isinstance(obj, Mapping):
result = {}
for key, value in obj.items():
result[_to_pretty_printable_object(key)] = _to_pretty_printable_object(value)
return result
elif is_dataclass(obj):
result = {"class": type(obj).__name__}
for field in fields(obj):
result[field.name] = _to_pretty_printable_object(getattr(obj, field.name))
return result
elif is_hashable_base_model(obj):
result = {"class": type(obj).__name__}
for field_name, value in obj.dict().items():
result[field_name] = _to_pretty_printable_object(value)
return result
# Can't make it more pretty.
return obj
def pretty_format(obj) -> str: # type: ignore
"""Return the object as a string that looks pretty."""
if isinstance(obj, str):
return obj
return pprint.pformat(_to_pretty_printable_object(obj), width=80, sort_dicts=False)
def pformat_big_objects(*args, **kwargs) -> str: # type: ignore
"""Prints a series of objects with many fields in a pretty way.
See _to_pretty_printable_object() for more context on this format. Looks like:
measure_recipe:
{'class': 'MeasureRecipe',
'measure_node': ReadSqlSourceNode(node_id=rss_140),
'required_local_linkable_specs': ({'class': 'DimensionSpec',
'name': 'is_instant',
'identifier_links': ()},),
'join_linkable_instances_recipes': ()}
"""
items = []
for arg in args:
items.append(pretty_format(arg))
for key, value in kwargs.items():
items.append(f"{key}:")
items.append(textwrap.indent(pretty_format(value), prefix=" "))
return "\n".join(items)
SequenceT = TypeVar("SequenceT")
def flatten_nested_sequence(
sequence_of_sequences: Sequence[Sequence[SequenceT]],
) -> Tuple[SequenceT, ...]:
"""Convert a nested sequence into a flattened tuple.
e.g. ((1,2), (3,4)) -> (1, 2, 3, 4)
"""
return tuple(itertools.chain.from_iterable(sequence_of_sequences))
def flatten_and_dedupe(
sequence_of_sequences: Sequence[Sequence[SequenceT]],
) -> Tuple[SequenceT, ...]:
"""Convert a nested sequence into a flattened tuple, with de-duping.
e.g. ((1,2), (2,3)) -> (1, 2, 3)
"""
items = flatten_nested_sequence(sequence_of_sequences)
return tuple(OrderedDict.fromkeys(items))
def random_id() -> str:
"""Generates an 8-digit random alphanumeric string."""
alphabet = string.ascii_lowercase + string.digits
# Characters that go below the line are visually unappealing, so don't use those.
filtered_alphabet = [x for x in alphabet if x not in "gjpqy"]
return "".join(random.choices(filtered_alphabet, k=8))
def hash_items(items: Sequence[SqlColumnType]) -> str:
"""Produces a hash from a list of strings."""
hash_builder = sha1()
for item in items:
hash_builder.update(str(item).encode("utf-8"))
return hash_builder.hexdigest()
SqlColumnType = Union[str, int, float, datetime.datetime, datetime.date, bool]
class iter_bucket:
"""Wrap *iterable* and return an object that buckets it iterable into
child iterables based on a *key* function.
>>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
>>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
>>> sorted(list(s)) # Get the keys
['a', 'b', 'c']
>>> a_iterable = s['a']
>>> next(a_iterable)
'a1'
>>> next(a_iterable)
'a2'
>>> list(s['b'])
['b1', 'b2', 'b3']
The original iterable will be advanced and its items will be cached until
they are used by the child iterables. This may require significant storage.
By default, attempting to select a bucket to which no items belong will
exhaust the iterable and cache all values.
If you specify a *validator* function, selected buckets will instead be
checked against it.
>>> from itertools import count
>>> it = count(1, 2) # Infinite sequence of odd numbers
>>> key = lambda x: x % 10 # Bucket by last digit
>>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
>>> s = bucket(it, key=key, validator=validator)
>>> 2 in s
False
>>> list(s[2])
[]
"""
def __init__(self, iterable, key, validator=None):
self._it = iter(iterable)
self._key = key
self._cache = defaultdict(deque)
self._validator = validator or (lambda x: True)
def __contains__(self, value):
if not self._validator(value):
return False
try:
item = next(self[value])
except StopIteration:
return False
else:
self._cache[value].appendleft(item)
return True
def _get_values(self, value):
"""
Helper to yield items from the parent iterator that match *value*.
Items that don't match are stored in the local cache as they
are encountered.
"""
while True:
# If we've cached some items that match the target value, emit
# the first one and evict it from the cache.
if self._cache[value]:
yield self._cache[value].popleft()
# Otherwise we need to advance the parent iterator to search for
# a matching item, caching the rest.
else:
while True:
try:
item = next(self._it)
except StopIteration:
return
item_value = self._key(item)
if item_value == value:
yield item
break
elif self._validator(item_value):
self._cache[item_value].append(item)
def __iter__(self):
for item in self._it:
item_value = self._key(item)
if self._validator(item_value):
self._cache[item_value].append(item)
yield from self._cache.keys()
def __getitem__(self, value):
if not self._validator(value):
return iter(())
return self._get_values(value)

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
from dataclasses import dataclass
from dbt.dataclass_schema import dbtClassMixin
@dataclass(frozen=True)
class ElementReference(dbtClassMixin):
"""Used when we need to refer to a dimension, measure, identifier, but other attributes are unknown."""
name: str
@dataclass(frozen=True)
class LinkableElementReference(ElementReference):
"""Used when we need to refer to a dimension or identifier, but other attributes are unknown."""
pass
@dataclass(frozen=True)
class MeasureReference(ElementReference):
"""Used when we need to refer to a measure (separate from LinkableElementReference because measures aren't linkable"""
pass
@dataclass(frozen=True)
class DimensionReference(LinkableElementReference): # noqa: D
pass
@property
def time_dimension_reference(self) -> TimeDimensionReference: # noqa: D
return TimeDimensionReference(name=self.name)
@dataclass(frozen=True)
class TimeDimensionReference(DimensionReference): # noqa: D
pass
def dimension_reference(self) -> DimensionReference: # noqa: D
return DimensionReference(name=self.name)
@dataclass(frozen=True)
class IdentifierReference(LinkableElementReference): # noqa: D
pass
@dataclass(frozen=True)
class CompositeSubIdentifierReference(ElementReference): # noqa: D
pass
@dataclass(frozen=True)
class MetricReference(ElementReference): # noqa: D
pass
class ModelReference(dbtClassMixin):
"""A reference to something in the model.
For example, a measure instance could have a defined_from field that has a model reference to the measure / data
source that it is supposed to reference. Added for exploratory purposes, so whether this is needed is TBD.
"""
pass
@dataclass(frozen=True)
class EntityReference(ModelReference):
"""A reference to a entity definition in the model."""
entity_name: str
def __hash__(self) -> int: # noqa: D
return hash(self.entity_name)
@dataclass(frozen=True)
class EntityElementReference(ModelReference):
"""A reference to an element definition in a data source definition in the model.
TODO: Fields should be *Reference objects.
"""
entity_name: str
name: str
@staticmethod
def create_from_references( # noqa: D
entity_reference: EntityReference, element_reference: ElementReference
) -> EntityElementReference:
return EntityElementReference(
entity_name=entity_reference.entity_name,
name=element_reference.name,
)
@property
def entity_reference(self) -> EntityReference: # noqa: D
return EntityReference(self.entity_name)
def is_from(self, ref: EntityReference) -> bool:
"""Returns true if this reference is from the same data source as the supplied reference."""
return self.entity_name == ref.entity_name
@dataclass(frozen=True)
class MetricModelReference(ModelReference):
"""A reference to a metric definition in the model."""
metric_name: str

View File

@@ -0,0 +1,131 @@
from __future__ import annotations
import datetime
from collections import OrderedDict
from dataclasses import dataclass
from typing import Any, Optional, Tuple, Mapping
from dbt.dataclass_schema import dbtClassMixin
from dbt.semantic.object_utils import SqlColumnType, assert_exactly_one_arg_set
@dataclass(frozen=True)
class SqlBindParameterValue(dbtClassMixin):
"""SqlColumnType has issues with serialization, so using this union-style type."""
str_value: Optional[str] = None
int_value: Optional[int] = None
float_value: Optional[float] = None
datetime_value: Optional[datetime.datetime] = None
date_value: Optional[datetime.date] = None
bool_value: Optional[bool] = None
def __post_init__(self) -> None: # noqa: D
assert_exactly_one_arg_set(
str_value=self.str_value,
int_value=self.int_value,
float_value=self.float_value,
datetime_value=self.datetime_value,
date_value=self.date_value,
bool_value=self.bool_value,
)
@property
def union_value(self) -> SqlColumnType: # noqa: D
if self.str_value is not None:
return self.str_value
elif self.int_value is not None:
return self.int_value
elif self.float_value is not None:
return self.float_value
elif self.datetime_value is not None:
return self.datetime_value
elif self.date_value is not None:
return self.date_value
elif self.bool_value is not None:
return self.bool_value
raise RuntimeError("No values are set - this should have been prevented by the post init")
@staticmethod
def create_from_sql_column_type(value: SqlColumnType) -> SqlBindParameterValue:
"""Convenience method for creating these values. Frowning on the use of isinstance()."""
if isinstance(value, str):
return SqlBindParameterValue(str_value=value)
elif isinstance(value, int):
return SqlBindParameterValue(int_value=value)
elif isinstance(value, float):
return SqlBindParameterValue(float_value=value)
elif isinstance(value, datetime.datetime):
return SqlBindParameterValue(datetime_value=value)
elif isinstance(value, datetime.date):
return SqlBindParameterValue(date_value=value)
elif isinstance(value, bool):
return SqlBindParameterValue(bool_value=value)
raise RuntimeError(f"Unhandled type: {type(value)}")
@dataclass(frozen=True)
class SqlBindParameter(dbtClassMixin): # noqa: D
key: str
value: SqlBindParameterValue
@dataclass(frozen=True)
class SqlBindParameters(dbtClassMixin):
"""Helps to build execution parameters during SQL query rendering.
These can be used as per https://docs.sqlalchemy.org/en/14/core/tutorial.html#using-textual-sql
"""
# Using tuples for immutability as dicts are not.
param_items: Tuple[SqlBindParameter, ...] = ()
def combine(self, additional_params: SqlBindParameters) -> SqlBindParameters:
"""Create a new set of bind parameters that includes parameters from this and additional_params"""
if len(self.param_items) == 0:
return additional_params
if len(additional_params.param_items) == 0:
return self
self_dict = {item.key: item.value for item in self.param_items}
other_dict = {item.key: item.value for item in additional_params.param_items}
for key, value in other_dict.items():
if key in self_dict and self_dict[key] != value:
raise RuntimeError(
f"Conflict with key {key} in combining parameters. "
f"Existing params: {self_dict} Additional params: {other_dict}"
)
new_items = list(self.param_items)
included_keys = set(item.key for item in new_items)
for item in additional_params.param_items:
if item.key in included_keys:
continue
new_items.append(item)
included_keys.add(item.key)
return SqlBindParameters(param_items=tuple(new_items))
@property
def param_dict(self) -> OrderedDict[str, SqlColumnType]:
"""Useful for passing into SQLAlchemy / DB-API methods."""
param_dict: OrderedDict[str, SqlColumnType] = OrderedDict()
for item in self.param_items:
param_dict[item.key] = item.value.union_value
return param_dict
@staticmethod
def create_from_dict(param_dict: Mapping[str, SqlColumnType]) -> SqlBindParameters: # noqa: D
return SqlBindParameters(
tuple(
SqlBindParameter(
key=key, value=SqlBindParameterValue.create_from_sql_column_type(value)
)
for key, value in param_dict.items()
)
)
def __eq__(self, other: Any) -> bool: # type: ignore # noqa: D
return isinstance(other, SqlBindParameters) and self.param_dict == other.param_dict

219
core/dbt/semantic/time.py Normal file
View File

@@ -0,0 +1,219 @@
from __future__ import annotations
from typing import Any
from dbt.dataclass_schema import StrEnum
from dbt.semantic.object_utils import assert_values_exhausted
class TimeGranularity(StrEnum):
"""For time dimensions, the smallest possible difference between two time values.
Needed for calculating adjacency when merging 2 different time ranges.
"""
# Names are used in parameters to DATE_TRUNC, so don't change them.
# Values are used to convert user supplied strings to enums.
DAY = "day"
WEEK = "week"
MONTH = "month"
QUARTER = "quarter"
YEAR = "year"
def to_int(self) -> int:
"""Convert to an int so that the size of the granularity can be easily compared."""
if self is TimeGranularity.DAY:
return 10
elif self is TimeGranularity.WEEK:
return 11
elif self is TimeGranularity.MONTH:
return 12
elif self is TimeGranularity.QUARTER:
return 13
elif self is TimeGranularity.YEAR:
return 14
else:
assert_values_exhausted(self)
def is_smaller_than(self, other: "TimeGranularity") -> bool: # noqa: D
return self.to_int() < other.to_int()
def is_smaller_than_or_equal(self, other: "TimeGranularity") -> bool: # noqa: D
return self.to_int() <= other.to_int()
# @property
# def offset_period(self) -> pd.offsets.DateOffset:
# """Offset object to use for adjusting by one granularity period."""
# # The type checker is throwing errors for some of those arguments, but they are valid.
# if self is TimeGranularity.DAY:
# return pd.offsets.DateOffset(days=1) # type: ignore
# elif self is TimeGranularity.WEEK:
# return pd.offsets.DateOffset(weeks=1) # type: ignore
# elif self is TimeGranularity.MONTH:
# return pd.offsets.DateOffset(months=1)
# elif self is TimeGranularity.QUARTER:
# return pd.offsets.DateOffset(months=3)
# elif self is TimeGranularity.YEAR:
# return pd.offsets.DateOffset(years=1) # type: ignore
# else:
# assert_values_exhausted(self)
@property
def format_with_first_or_last(self) -> bool:
"""Indicates that this can only be calculated if query results display the first or last date of the period."""
return self in [TimeGranularity.MONTH, TimeGranularity.QUARTER, TimeGranularity.YEAR]
# def is_period_start(self, date: Union[pd.Timestamp, date]) -> bool: # noqa: D
# pd_date = pd.Timestamp(date)
# if self is TimeGranularity.DAY:
# return True
# elif self is TimeGranularity.WEEK:
# return ISOWeekDay.from_pandas_timestamp(pd_date).is_week_start
# elif self is TimeGranularity.MONTH:
# return pd_date.is_month_start
# elif self is TimeGranularity.QUARTER:
# return pd_date.is_quarter_start
# elif self is TimeGranularity.YEAR:
# return pd_date.is_year_start
# else:
# assert_values_exhausted(self)
# def is_period_end(self, date: Union[pd.Timestamp, date]) -> bool: # noqa: D
# pd_date = pd.Timestamp(date)
# if self is TimeGranularity.DAY:
# return True
# elif self is TimeGranularity.WEEK:
# return ISOWeekDay.from_pandas_timestamp(pd_date).is_week_end
# elif self is TimeGranularity.MONTH:
# return pd_date.is_month_end
# elif self is TimeGranularity.QUARTER:
# return pd_date.is_quarter_end
# elif self is TimeGranularity.YEAR:
# return pd_date.is_year_end
# else:
# assert_values_exhausted(self)
# @property
# def period_begin_offset( # noqa: D
# self,
# ) -> Union[
# pd.offsets.MonthBegin, pd.offsets.QuarterBegin, pd.offsets.Week, pd.offsets.YearBegin
# ]:
# if self is TimeGranularity.DAY:
# raise ValueError(f"Can't get period start offset for TimeGranularity.{self.name}.")
# elif self is TimeGranularity.WEEK:
# return pd.offsets.Week(weekday=ISOWeekDay.MONDAY.pandas_value)
# elif self is TimeGranularity.MONTH:
# return pd.offsets.MonthBegin()
# elif self is TimeGranularity.QUARTER:
# return pd.offsets.QuarterBegin(startingMonth=1)
# elif self is TimeGranularity.YEAR:
# return pd.offsets.YearBegin()
# else:
# assert_values_exhausted(self)
# @property
# def period_end_offset( # noqa: D
# self,
# ) -> Union[pd.offsets.MonthEnd, pd.offsets.QuarterEnd, pd.offsets.Week, pd.offsets.YearEnd]:
# if self is TimeGranularity.DAY:
# raise ValueError(f"Can't get period end offset for TimeGranularity.{self.name}.")
# elif self == TimeGranularity.WEEK:
# return pd.offsets.Week(weekday=ISOWeekDay.SUNDAY.pandas_value)
# elif self is TimeGranularity.MONTH:
# return pd.offsets.MonthEnd()
# elif self is TimeGranularity.QUARTER:
# return pd.offsets.QuarterEnd(startingMonth=3)
# elif self is TimeGranularity.YEAR:
# return pd.offsets.YearEnd()
# else:
# assert_values_exhausted(self)
# def adjust_to_start_of_period(
# self, date_to_adjust: pd.Timestamp, rollback: bool = True
# ) -> pd.Timestamp:
# """Adjust to start of period if not at start already."""
# if rollback:
# return self.period_begin_offset.rollback(date_to_adjust)
# else:
# return self.period_begin_offset.rollforward(date_to_adjust)
# def adjust_to_end_of_period(
# self, date_to_adjust: pd.Timestamp, rollforward: bool = True
# ) -> pd.Timestamp:
# """Adjust to end of period if not at end already."""
# if rollforward:
# return self.period_end_offset.rollforward(date_to_adjust)
# else:
# return self.period_end_offset.rollback(date_to_adjust)
# def match_start_or_end_of_period(
# self, date_to_match: pd.Timestamp, date_to_adjust: pd.Timestamp
# ) -> pd.Timestamp:
# """Adjust date_to_adjust to be start or end of period based on if date_to_match is at start or end of period."""
# if self.is_period_start(date_to_match):
# return self.adjust_to_start_of_period(date_to_adjust)
# elif self.is_period_end(date_to_match):
# return self.adjust_to_end_of_period(date_to_adjust)
# else:
# raise ValueError(
# f"Expected `date_to_match` to fall at the start or end of the granularity period. Got '{date_to_match}' for granularity {self}."
# )
def __lt__(self, other: Any) -> bool: # type: ignore [misc] # noqa: D
if not isinstance(other, TimeGranularity):
return NotImplemented
return self.to_int() < other.to_int()
def __hash__(self) -> int: # noqa: D
return self.to_int()
def __repr__(self) -> str: # noqa: D
return f"{self.__class__.__name__}.{self.name}"
class ISOWeekDay(StrEnum):
"""Day of week values per ISO standard"""
MONDAY = 1
TUESDAY = 2
WEDNESDAY = 3
THURSDAY = 4
FRIDAY = 5
SATURDAY = 6
SUNDAY = 7
# @staticmethod
# def from_pandas_timestamp(timestamp: pd.Timestamp) -> ISOWeekDay:
# """Factory for streamlining conversion from a Pandas Timestamp to an ISOWeekDay"""
# return ISOWeekDay(timestamp.isoweekday())
@property
def is_week_start(self) -> bool:
"""Return comparison of instance value against ISO standard start of week (Monday)"""
return self is ISOWeekDay.MONDAY
@property
def is_week_end(self) -> bool:
"""Return comparison of instance value against ISO standard end of week (Sunday)"""
return self is ISOWeekDay.SUNDAY
@property
def pandas_value(self) -> int:
"""Returns the pandas int value representation of the ISOWeekDay"""
return self.value - 1
def string_to_time_granularity(s: str) -> TimeGranularity: # noqa: D
values = {item.value: item for item in TimeGranularity}
return values[s]
SUPPORTED_GRANULARITIES = [
TimeGranularity.DAY,
TimeGranularity.WEEK,
TimeGranularity.MONTH,
TimeGranularity.QUARTER,
TimeGranularity.YEAR,
]

View File

@@ -0,0 +1,21 @@
from abc import ABC
from dbt.contracts.graph.nodes import Entity
from dbt.semantic.aggregation_properties import AggregationType
class BooleanMeasureAggregation(ABC):
"""Converts the expression used in boolean measures so that it can be aggregated."""
@staticmethod
def _transform_entity(entity: Entity) -> Entity: # noqa: D
if entity.measures:
for measure in entity.measures:
if measure.agg == AggregationType.SUM_BOOLEAN:
if measure.expr:
measure.expr = f"case when {measure.expr} then 1 else 0 end"
else:
measure.expr = f"case when {measure.name} then 1 else 0 end"
measure.agg = AggregationType.SUM
return entity

View File

@@ -0,0 +1,28 @@
from dbt.contracts.graph.nodes import Entity
from abc import ABC
class CompositeIdentifierExpressionRule(ABC):
"""Transform composite sub-identifiers for convenience.
If a sub-identifier has no expression, check if an identifier exists
with the same name and use that identifier's expression if it has one.
"""
@staticmethod
def _transform_entity(entity: Entity) -> Entity: # noqa: D
for identifier in entity.identifiers:
if identifier.identifiers is None or len(identifier.identifiers) == 0:
continue
for sub_identifier in identifier.identifiers:
if sub_identifier.name or sub_identifier.expr:
continue
for identifier in entity.identifiers:
if sub_identifier.ref == identifier.name:
sub_identifier.ref = None
sub_identifier.name = identifier.name
sub_identifier.expr = identifier.expr
break
return entity

View File

@@ -0,0 +1,24 @@
from abc import ABC
from dbt.semantic.aggregation_properties import AggregationType
from dbt.contracts.graph.nodes import Entity
ONE = "1"
class ConvertCountToSum(ABC):
"""Converts any COUNT measures to SUM equivalent."""
@staticmethod
def _transform_entity(entity: Entity) -> Entity: # noqa: D
if entity.measures:
for measure in entity.measures:
if measure.agg == AggregationType.COUNT:
# NOTE: Removed if expr none error because dbt metric design encourages count on
# columns, not requiring an expression. This makes it easier for users.
if measure.expr is None:
measure.expr = f"case when {measure.name} is not null then 1 else 0 end"
elif measure.expr != ONE:
# Just leave it as SUM(1) if we want to count all
measure.expr = f"case when {measure.expr} is not null then 1 else 0 end"
measure.agg = AggregationType.SUM
return entity

View File

@@ -0,0 +1,39 @@
from abc import ABC
from dbt.semantic.aggregation_properties import AggregationType
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.measures import MeasureAggregationParameters
from dbt.exceptions import DbtSemanticValidationError
MEDIAN_PERCENTILE = 0.5
class ConvertMedianToPercentile(ABC):
"""Converts any MEDIAN measures to percentile equivalent."""
@staticmethod
def _transform_entity(entity: Entity) -> Entity: # noqa: D
if entity.measures:
for measure in entity.measures:
if measure.agg == AggregationType.MEDIAN:
measure.agg = AggregationType.PERCENTILE
if not measure.agg_params:
measure.agg_params = MeasureAggregationParameters()
else:
if (
measure.agg_params.percentile is not None
and measure.agg_params.percentile != 0.5
):
raise DbtSemanticValidationError(
f"Measure '{measure.name}' uses a MEDIAN aggregation, while percentile is set to "
f"'{measure.agg_params.percentile}', a conflicting value. Please remove the parameter "
"or set to '0.5'."
)
if measure.agg_params.use_discrete_percentile:
raise DbtSemanticValidationError(
f"Measure '{measure.name}' uses a MEDIAN aggregation, while use_discrete_percentile"
f"is set to true. Please remove the parameter or set to False."
)
measure.agg_params.percentile = MEDIAN_PERCENTILE
# let's not set use_approximate_percentile to be false due to valid performance reasons
return entity

View File

@@ -0,0 +1,21 @@
from dbt.contracts.graph.nodes import Entity
from abc import ABC
class LowerCaseNames(ABC):
"""Lowercases the names of both top level objects and entity elements"""
@staticmethod
def _transform_entity(entity: Entity) -> Entity:
"""Lowercases the names of data source elements."""
entity.name = entity.name.lower()
if entity.measures:
for measure in entity.measures:
measure.name = measure.name.lower()
if entity.identifiers:
for identifier in entity.identifiers:
identifier.name = identifier.name.lower()
if entity.dimensions:
for dimension in entity.dimensions:
dimension.name = dimension.name.lower()
return entity

View File

@@ -0,0 +1,35 @@
from abc import ABC
from typing import Optional
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.dimensions import DimensionType
from dbt.semantic.references import TimeDimensionReference
class SetMeasureAggregationTimeDimension(ABC):
"""Sets the aggregation time dimension for measures to the primary time dimension if not defined."""
@staticmethod
def _find_primary_time_dimension(entity: Entity) -> Optional[TimeDimensionReference]:
for dimension in entity.dimensions:
if (
dimension.type == DimensionType.TIME
and dimension.type_params
and dimension.type_params.is_primary
):
return dimension.time_dimension_reference
return None
@staticmethod
def _transform_entity(entity: Entity) -> Entity: # noqa: D
primary_time_dimension_reference = (
SetMeasureAggregationTimeDimension._find_primary_time_dimension(entity=entity)
)
if primary_time_dimension_reference:
if entity.measures:
for measure in entity.measures:
if not measure.agg_time_dimension:
measure.agg_time_dimension = primary_time_dimension_reference.name
return entity

View File

@@ -0,0 +1,98 @@
from dbt.contracts.graph.metrics import MetricType, MetricInputMeasure, MetricTypeParams
from dbt.node_types import NodeType
from dbt.contracts.graph.nodes import Metric, Entity
from dbt.clients.jinja import get_rendered
from abc import ABC
from typing import List
from dbt.context.providers import (
generate_parse_metrics,
)
class ProxyMeasure(ABC):
"""All the functionality needed to convert measures to metrics"""
@staticmethod
def _create_proxy_metrics(parser, parsed_entity: Entity, path: str, fqn: List):
if parsed_entity.measures:
for measure in parsed_entity.measures:
if measure.create_metric:
add_metric = True
package_name = parser.project.project_name
unique_id = f"{NodeType.Metric}.{package_name}.{measure.name}"
original_file_path = parser.yaml.path.original_file_path
fqn[2] = measure.name
# TODO: Figure out new location in validation
# if parsed_entity.metrics:
# breakpoint()
# for metric in parsed_entity.metrics:
# if metric == measure.name:
# if metric.type != MetricType.MEASURE_PROXY:
# raise DbtValidationError(
# f"Cannot have metric with the same name as a measure ({measure.name}) that is not a "
# f"proxy for that measure"
# )
# add_metric = False
config = parser._generate_proxy_metric_config(
target=measure,
fqn=fqn,
package_name=package_name,
rendered=True,
)
config = config.finalize_and_validate()
unrendered_config = parser._generate_proxy_metric_config(
target=measure,
fqn=fqn,
package_name=package_name,
rendered=False,
)
if measure.expr:
measure_expr = measure.expr
else:
measure_expr = measure.name
if add_metric:
proxy_metric = Metric(
resource_type=NodeType.Metric,
package_name=package_name,
path=path,
original_file_path=original_file_path,
unique_id=unique_id,
fqn=fqn,
name=measure.name,
constraint=None,
entity="entity('" + parsed_entity.name + "')",
description=measure.description,
type=MetricType.MEASURE_PROXY,
type_params=MetricTypeParams(
measure=MetricInputMeasure(name=measure.name),
expr=measure_expr,
),
meta=measure.meta,
tags=measure.tags,
config=config,
unrendered_config=unrendered_config,
)
proxy_ctx = generate_parse_metrics(
proxy_metric,
parser.root_project,
parser.schema_parser.manifest,
package_name,
)
if proxy_metric.entity is not None:
entity_ref = "{{ " + proxy_metric.entity + " }}"
get_rendered(entity_ref, proxy_ctx, proxy_metric)
if proxy_metric.config.enabled:
parser.manifest.add_metric(parser.yaml.file, proxy_metric)
else:
parser.manifest.add_disabled(parser.yaml.file, proxy_metric)
return parser

View File

@@ -0,0 +1,42 @@
from typing import Set, List
from abc import ABC
from dbt.exceptions import DbtSemanticValidationError
from dbt.contracts.graph.metrics import MetricType, MetricInputMeasure
from dbt.contracts.graph.nodes import Metric
class AddInputMetricMeasures(ABC):
"""Add all measures corresponding to the input metrics of the derived metric."""
@staticmethod
def _get_measures_for_metric(
metric_name: str, metrics: List[Metric]
) -> Set[MetricInputMeasure]:
"""Returns a unique set of input measures for a given metric."""
measures = set()
metrics_generator = (metric for metric in metrics if metric.name == metric_name)
matched_metric = next(iter(metrics_generator), None)
if matched_metric:
if matched_metric.type == MetricType.DERIVED:
for input_metric in matched_metric.input_metrics:
measures.update(
AddInputMetricMeasures._get_measures_for_metric(input_metric.name, metrics)
)
else:
measures.update(set(matched_metric.input_measures))
else:
raise DbtSemanticValidationError(
f"Metric '{metric_name}' is not configured as a metric in the model."
)
return measures
@staticmethod
def add_input_metrics(metric: Metric, metrics: List[Metric]) -> Metric: # noqa: D
if metric.type == MetricType.DERIVED:
measures = AddInputMetricMeasures._get_measures_for_metric(metric.name, metrics)
if metric.type_params.measures is None:
raise DbtSemanticValidationError(
f"Metric '{metric.name}' is derived, which cannot have measures predefined in config."
)
metric.type_params.measures = list(measures)
return metric

View File

@@ -0,0 +1,98 @@
from typing import Union, List
from abc import ABC
from dbt.contracts.graph.metrics import (
UnparsedMetricInputMeasure,
MetricInputMeasure,
MetricTimeWindow,
UnparsedMetricInput,
MetricInput,
UnparsedMetricTypeParams,
MetricTypeParams,
)
from dbt.semantic.constraints import WhereClauseConstraint
class ConvertTypeParams(ABC):
"""All the functionality needed to convert UnparsedMetricTypeParams to MetricTypeParams"""
@staticmethod
def _get_parameter(
parameter: Union[UnparsedMetricInputMeasure, str, None]
) -> MetricInputMeasure:
if isinstance(parameter, str):
return MetricInputMeasure(name=parameter)
elif isinstance(parameter, UnparsedMetricInputMeasure):
return MetricInputMeasure(
name=parameter.name,
constraint=WhereClauseConstraint.parse(parameter.constraint),
alias=parameter.alias,
)
@staticmethod
def _get_parameters(
parameters: List[Union[UnparsedMetricInputMeasure, str]]
) -> List[MetricInputMeasure]:
parameters_list = []
if parameters:
for parameter in parameters:
if isinstance(parameter, str):
parameters_list.append(MetricInputMeasure(name=parameter))
elif isinstance(parameter, UnparsedMetricInputMeasure):
parameters_list.append(
MetricInputMeasure(
name=parameter.name,
constraint=WhereClauseConstraint.parse(parameter.constraint),
alias=parameter.alias,
)
)
return parameters_list
else:
return []
@staticmethod
def _get_window_parameter(parameter: Union[MetricTimeWindow, str, None]):
if isinstance(parameter, str):
return MetricTimeWindow.parse(window=parameter)
elif isinstance(parameter, MetricTimeWindow):
return parameter
else:
return None
@staticmethod
def _get_metric_parameters(
parameters: List[Union[UnparsedMetricInput, str]]
) -> List[MetricInput]:
parameters_list = []
if parameters:
for parameter in parameters:
if isinstance(parameter, str):
parameters_list.append(MetricInput(name=parameter))
elif isinstance(parameter, UnparsedMetricInput):
parameters_list.append(
MetricInput(
name=parameter.name,
constraint=parameter.constraint,
alias=parameter.alias,
offset_window=parameter.offset_window,
offset_to_grain=parameter.offset_to_grain,
)
)
return parameters_list
else:
return []
@staticmethod
def _get_metric_type_params(type_params: UnparsedMetricTypeParams) -> MetricTypeParams:
parsed_type_params = MetricTypeParams(
measure=ConvertTypeParams._get_parameter(type_params.measure),
measures=ConvertTypeParams._get_parameters(type_params.measures),
numerator=ConvertTypeParams._get_parameter(type_params.numerator),
denominator=ConvertTypeParams._get_parameter(type_params.denominator),
expr=type_params.expr,
window=ConvertTypeParams._get_window_parameter(type_params.window),
grain_to_date=type_params.grain_to_date,
metrics=ConvertTypeParams._get_metric_parameters(type_params.metrics),
)
return parsed_type_params

View File

@@ -0,0 +1,15 @@
from typing import List
from dataclasses import dataclass, field
from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.graph.nodes import Entity, Metric
@dataclass
class UserConfiguredModel(dbtClassMixin):
"""Model holds all the information the SemanticLayer needs to render a query"""
entities: List[Entity] = field(default_factory=list)
metrics: List[Metric] = field(default_factory=list)
def _serialize(self):
return self.to_dict()

View File

@@ -0,0 +1,61 @@
from typing import List
from dbt.semantic.references import EntityElementReference, TimeDimensionReference
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.dimensions import DimensionType
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityElementContext,
EntityElementType,
ModelValidationRule,
ValidationIssueType,
ValidationError,
)
class AggregationTimeDimensionRule(ModelValidationRule):
"""Checks that the aggregation time dimension for a measure points to a valid time dimension in the entity."""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues.extend(AggregationTimeDimensionRule._validate_entity(entity))
return issues
@staticmethod
def _time_dimension_in_model(
time_dimension_reference: TimeDimensionReference, entity: Entity
) -> bool:
for dimension in entity.dimensions:
if (
dimension.type == DimensionType.TIME
and dimension.name == time_dimension_reference.name
):
return True
return False
@staticmethod
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
for measure in entity.measures:
measure_context = EntityElementContext(
entity_element=EntityElementReference(entity_name=entity.name, name=measure.name),
element_type=EntityElementType.MEASURE,
)
agg_time_dimension_reference = measure.checked_agg_time_dimension
if not AggregationTimeDimensionRule._time_dimension_in_model(
time_dimension_reference=agg_time_dimension_reference, entity=entity
):
issues.append(
ValidationError(
context=measure_context,
message=f"In entity '{entity.name}', measure '{measure.name}' has the aggregation "
f"time dimension set to '{agg_time_dimension_reference.name}', "
f"which is not a valid time dimension in the entity",
)
)
return issues

View File

@@ -0,0 +1,78 @@
from typing import Dict, List, Set
from dbt.semantic.references import EntityElementReference, IdentifierReference
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.identifiers import Identifier
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityElementContext,
EntityElementType,
ModelValidationRule,
ValidationWarning,
validate_safely,
ValidationIssueType,
)
class CommonIdentifiersRule(ModelValidationRule):
"""Checks that identifiers exist on more than one entity"""
@staticmethod
def _map_entity_identifiers(entities: List[Entity]) -> Dict[IdentifierReference, Set[str]]:
"""Generate mapping of identifier names to the set of entities where it is defined"""
identifiers_to_entities: Dict[IdentifierReference, Set[str]] = {}
for entity in entities or []:
for identifier in entity.identifiers or []:
if identifier.reference in identifiers_to_entities:
identifiers_to_entities[identifier.reference].add(entity.name)
else:
identifiers_to_entities[identifier.reference] = {entity.name}
return identifiers_to_entities
@staticmethod
@validate_safely(whats_being_done="checking identifier exists on more than one entity")
def _check_identifier(
identifier: Identifier,
entity: Entity,
identifiers_to_entities: Dict[IdentifierReference, Set[str]],
) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
# If the identifier is the dict and if the set of entitys minus this entity is empty,
# then we warn the user that their identifier will be unused in joins
if (
identifier.reference in identifiers_to_entities
and len(identifiers_to_entities[identifier.reference].difference({entity.name})) == 0
):
issues.append(
ValidationWarning(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=identifier.name
),
element_type=EntityElementType.IDENTIFIER,
),
message=f"Identifier `{identifier.reference.name}` "
f"only found in one entity `{entity.name}` "
f"which means it will be unused in joins.",
)
)
return issues
@staticmethod
@validate_safely(
whats_being_done="running model validation warning if identifiers are only one one entity"
)
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Issues a warning for any identifier that is associated with only one entity"""
issues = []
identifiers_to_entities = CommonIdentifiersRule._map_entity_identifiers(model.entities)
for entity in model.entities or []:
for identifier in entity.identifiers or []:
issues += CommonIdentifiersRule._check_identifier(
identifier=identifier,
entity=entity,
identifiers_to_entities=identifiers_to_entities,
)
return issues

View File

@@ -0,0 +1,148 @@
from typing import Dict, List
from dbt.semantic.references import EntityElementReference, DimensionReference
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.dimensions import Dimension, DimensionType
from dbt.semantic.validations.validator_helpers import (
EntityElementContext,
EntityElementType,
ModelValidationRule,
DimensionInvariants,
ValidationIssueType,
ValidationError,
)
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.time import TimeGranularity
class DimensionConsistencyRule(ModelValidationRule):
"""Checks for consistent dimension properties in the entitys in a model.
* Dimensions with the same name should be of the same type.
* Dimensions with the same name should be either all partitions or not.
"""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
dimension_to_invariant: Dict[DimensionReference, DimensionInvariants] = {}
time_dims_to_granularity: Dict[DimensionReference, TimeGranularity] = {}
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues += DimensionConsistencyRule._validate_entity(
entity=entity,
dimension_to_invariant=dimension_to_invariant,
update_invariant_dict=True,
)
for dimension in entity.dimensions:
issues += DimensionConsistencyRule._validate_dimension(
dimension=dimension,
time_dims_to_granularity=time_dims_to_granularity,
entity=entity,
)
return issues
@staticmethod
def _validate_dimension(
dimension: Dimension,
time_dims_to_granularity: Dict[DimensionReference, TimeGranularity],
entity: Entity,
) -> List[ValidationIssueType]:
"""Checks that time dimensions of the same name that aren't primary have the same time granularity specifications
Args:
dimension: the dimension to check
time_dims_to_granularity: a dict from the dimension to the time granularity it should have
entity: the associated entity. Used for generated issue messages
"""
issues: List[ValidationIssueType] = []
context = EntityElementContext(
entity_element=EntityElementReference(entity_name=entity.name, name=dimension.name),
element_type=EntityElementType.DIMENSION,
)
if dimension.type == DimensionType.TIME:
if dimension.reference not in time_dims_to_granularity and dimension.type_params:
time_dims_to_granularity[
dimension.reference
] = dimension.type_params.time_granularity
# The primary time dimension can be of different time granularities, so don't check for it.
if (
dimension.type_params is not None
and not dimension.type_params.is_primary
and dimension.type_params.time_granularity
!= time_dims_to_granularity[dimension.reference]
):
expected_granularity = time_dims_to_granularity[dimension.reference]
issues.append(
ValidationError(
context=context,
message=f"Time granularity must be the same for time dimensions with the same name. "
f"Problematic dimension: {dimension.name} in entity with name: "
f"`{entity.name}`. Expected granularity is {expected_granularity.name}.",
)
)
return issues
@staticmethod
def _validate_entity(
entity: Entity,
dimension_to_invariant: Dict[DimensionReference, DimensionInvariants],
update_invariant_dict: bool,
) -> List[ValidationIssueType]:
"""Checks that the given entity has dimensions consistent with the given invariants.
Args:
entity: the entity to check
dimension_to_invariant: a dict from the dimension name to the properties it should have
update_invariant_dict: whether to insert an entry into the dict if the given dimension name doesn't exist.
"""
issues: List[ValidationIssueType] = []
for dimension in entity.dimensions:
dimension_invariant = dimension_to_invariant.get(dimension.reference)
if dimension_invariant is None:
if update_invariant_dict:
dimension_invariant = DimensionInvariants(
dimension.type, dimension.is_partition or False
)
dimension_to_invariant[dimension.reference] = dimension_invariant
continue
# TODO: Can't check for unknown dimensions easily as the name follows <id>__<name> format.
# e.g. user__created_at
continue
# is_partition might not be specified in the configs, so default to False.
is_partition = dimension.is_partition or False
context = EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=dimension.name
),
element_type=EntityElementType.DIMENSION,
)
if dimension_invariant.type != dimension.type:
issues.append(
ValidationError(
context=context,
message=f"In entity `{entity.name}`, type conflict for dimension `{dimension.name}` "
f"- already in model as type `{dimension_invariant.type}` but got `{dimension.type}`",
)
)
if dimension_invariant.is_partition != is_partition:
issues.append(
ValidationError(
context=context,
message=f"In entity `{entity.name}, conflicting is_partition attribute for dimension "
f"`{dimension.reference}` - already in model"
f" with is_partition as `{dimension_invariant.is_partition}` but got "
f"`{is_partition}``",
)
)
return issues

View File

@@ -0,0 +1,79 @@
from collections import defaultdict
from typing import List, DefaultDict
from dbt.semantic.references import EntityReference
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityContext,
EntityElementType,
ModelValidationRule,
ValidationError,
ValidationIssueType,
)
class ElementConsistencyRule(ModelValidationRule):
"""Checks that elements in data sources with the same name are of the same element type across the model
This reduces the potential confusion that might arise from having an identifier named `country` and a dimension
named `country` while allowing for things like the `user` identifier to exist in multiple data sources. Note not
all element types allow duplicates, and there are separate validation rules for those cases. See, for example,
the DataSourceMeasuresUniqueRule.
"""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues = []
name_to_types = ElementConsistencyRule._get_name_to_types(model=model)
invalid_elements = {
name: type_mapping
for name, type_mapping in name_to_types.items()
if len(type_mapping) > 1
}
for name, type_to_context in invalid_elements.items():
# Sort these by value to ensure consistent error messaging
types_used = [
EntityElementType(v) for v in sorted(k.value for k in type_to_context.keys())
]
value_types_used = [type.value for type in types_used]
for element_type in types_used:
entity_contexts = type_to_context[element_type]
entity_names = {ctx.entity.entity_name for ctx in entity_contexts}
entity_context = entity_contexts[0]
issues.append(
ValidationError(
context=entity_context,
message=f"In data sources {entity_names}, element `{name}` is of type "
f"{element_type.value}, but it is used as types {value_types_used} across the model.",
)
)
return issues
@staticmethod
def _get_name_to_types(
model: UserConfiguredModel,
) -> DefaultDict[str, DefaultDict[EntityElementType, List[EntityContext]]]:
"""Create a mapping of all element names in the model to types with a list of associated EntityContexts"""
element_types: DefaultDict[
str, DefaultDict[EntityElementType, List[EntityContext]]
] = defaultdict(lambda: defaultdict(list))
for entity in model.entities:
entity_context = EntityContext(
entity=EntityReference(entity_name=entity.name),
)
if entity.measures:
for measure in entity.measures:
element_types[measure.name][EntityElementType.MEASURE].append(entity_context)
if entity.dimensions:
for dimension in entity.dimensions:
element_types[dimension.name][EntityElementType.DIMENSION].append(
entity_context
)
if entity.identifiers:
for identifier in entity.identifiers:
element_types[identifier.name][EntityElementType.IDENTIFIER].append(
entity_context
)
return element_types

View File

@@ -0,0 +1,216 @@
from typing import List
from dbt.semantic.references import EntityElementReference, EntityReference
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.dimensions import DimensionType
from dbt.contracts.graph.identifiers import IdentifierType
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityContext,
EntityElementContext,
EntityElementType,
ModelValidationRule,
ValidationIssueType,
ValidationError,
)
from dbt.semantic.time import SUPPORTED_GRANULARITIES
class EntityTimeDimensionWarningsRule(ModelValidationRule):
"""Checks time dimensions in entities."""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues.extend(EntityTimeDimensionWarningsRule._validate_entity(entity=entity))
return issues
@staticmethod
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
primary_time_dimensions = []
for dim in entity.dimensions:
context = EntityElementContext(
entity_element=EntityElementReference(entity_name=entity.name, name=dim.name),
element_type=EntityElementType.DIMENSION,
)
if dim.type == DimensionType.TIME:
if dim.type_params is None:
continue
elif dim.type_params.is_primary:
primary_time_dimensions.append(dim)
elif dim.type_params.time_granularity:
if dim.type_params.time_granularity not in SUPPORTED_GRANULARITIES:
issues.append(
ValidationError(
context=context,
message=f"Unsupported time granularity in time dimension with name: {dim.name}, "
f"Please use {[s.value for s in SUPPORTED_GRANULARITIES]}",
)
)
# A entity must have a primary time dimension if it has
# any measures that don't have an `agg_time_dimension` set
if (
len(primary_time_dimensions) == 0
and len(entity.measures) > 0
and any(measure.agg_time_dimension is None for measure in entity.measures)
):
issues.append(
ValidationError(
context=EntityContext(
entity=EntityReference(entity_name=entity.name),
),
message=f"No primary time dimension in entity with name ({entity.name}). Please add one",
)
)
if len(primary_time_dimensions) > 1:
for primary_time_dimension in primary_time_dimensions:
issues.append(
ValidationError(
context=EntityContext(
entity=EntityReference(entity_name=entity.name),
),
message=f"In entity {entity.name}, "
f"Primary time dimension with name: {primary_time_dimension.name} "
f"is one of many defined as primary.",
)
)
return issues
class EntityValidityWindowRule(ModelValidationRule):
"""Checks validity windows in entitys to ensure they comply with runtime requirements"""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Checks the validity param definitions in every entity in the model"""
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues.extend(EntityValidityWindowRule._validate_entity(entity=entity))
return issues
@staticmethod
def _validate_entity(entity: Entity) -> List[ValidationIssueType]:
"""Runs assertions on entities with validity parameters set on one or more time dimensions"""
issues: List[ValidationIssueType] = []
validity_param_dims = [dim for dim in entity.dimensions if dim.validity_params is not None]
if not validity_param_dims:
return issues
context = EntityContext(
entity=EntityReference(entity_name=entity.name),
)
requirements = (
"Data sources using dimension validity params to define a validity window must have exactly two time "
"dimensions with validity params specified - one marked `is_start` and the other marked `is_end`."
)
validity_param_dimension_names = [dim.name for dim in validity_param_dims]
start_dim_names = [
dim.name
for dim in validity_param_dims
if dim.validity_params and dim.validity_params.is_start
]
end_dim_names = [
dim.name
for dim in validity_param_dims
if dim.validity_params and dim.validity_params.is_end
]
num_start_dims = len(start_dim_names)
num_end_dims = len(end_dim_names)
if len(validity_param_dims) == 1 and num_start_dims == 1 and num_end_dims == 1:
# Defining a single point window, such as one might find in a daily snapshot table keyed on date,
# is not currently supported.
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has a single validity param dimension that defines its window: "
f"`{validity_param_dimension_names[0]}`. This is not a currently supported configuration! "
f"{requirements} If you have one column defining a window, as in a daily snapshot table, you can "
f"define a separate dimension and increment the time value in the `expr` field as a work-around."
),
)
issues.append(error)
elif len(validity_param_dims) != 2:
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has {len(validity_param_dims)} dimensions defined with validity "
f"params. They are: {validity_param_dimension_names}. There must be either zero or two! "
f"If you wish to define a validity window for this entity, please follow these requirements: "
f"{requirements}"
),
)
issues.append(error)
elif num_start_dims != 1 or num_end_dims != 1:
# Validity windows must define both a start and an end, and there should be exactly one
start_dim_names = []
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has two validity param dimensions defined, but does not have "
f"exactly one each marked with is_start and is_end! Dimensions: {validity_param_dimension_names}. "
f"is_start dimensions: {start_dim_names}. is_end dimensions: {end_dim_names}. {requirements}"
),
)
issues.append(error)
primary_or_unique_identifiers = [
identifier
for identifier in entity.identifiers
if identifier.type in (IdentifierType.PRIMARY, IdentifierType.UNIQUE)
]
if not any(
[identifier.type is IdentifierType.NATURAL for identifier in entity.identifiers]
):
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has validity param dimensions defined, but does not have an "
f"identifier with type `natural` set. The natural key for this entity is what we use to "
f"process a validity window join. Primary or unique identifiers, if any, might be suitable for "
f"use as natural keys: ({[identifier.name for identifier in primary_or_unique_identifiers]})."
),
)
issues.append(error)
if primary_or_unique_identifiers:
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has validity param dimensions defined and also has one or more "
f"identifiers designated as `primary` or `unique`. This is not yet supported, as we do not "
f"currently process joins against these key types for entitys with validity windows "
f"specified."
),
)
issues.append(error)
if entity.measures:
# Temporarily block measure definitions in entitys with validity windows set
measure_names = [measure.name for measure in entity.measures]
error = ValidationError(
context=context,
message=(
f"Data source {entity.name} has both measures and validity param dimensions defined. This "
f"is not currently supported! Please remove either the measures or the validity params. "
f"Measure names: {measure_names}. Validity param dimension names: "
f"{validity_param_dimension_names}."
),
)
issues.append(error)
return issues

View File

@@ -0,0 +1,257 @@
from collections import defaultdict
from dataclasses import dataclass
from typing import List, MutableSet, Tuple, Sequence, DefaultDict
from dbt.contracts.graph.nodes import Entity
from dbt.contracts.graph.identifiers import Identifier, IdentifierType, CompositeSubIdentifier
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityContext,
EntityElementContext,
EntityElementType,
ModelValidationRule,
ValidationError,
ValidationIssueType,
ValidationWarning,
iter_flatten,
)
from dbt.semantic.references import IdentifierReference, EntityElementReference, EntityReference
class IdentifierConfigRule(ModelValidationRule):
"""Checks that entity identifiers are valid"""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues = []
for entity in model.entities:
issues += IdentifierConfigRule._validate_entity_identifiers(entity=entity)
return issues
@staticmethod
def _validate_entity_identifiers(entity: Entity) -> List[ValidationIssueType]:
"""Checks validity of composite identifiers"""
issues: List[ValidationIssueType] = []
for ident in entity.identifiers:
if ident.identifiers:
context = EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=ident.name
),
element_type=EntityElementType.IDENTIFIER,
)
for sub_id in ident.identifiers:
if sub_id.ref and (sub_id.name or sub_id.expr):
issues.append(
ValidationError(
context=context,
message=f"Both ref and name/expr set in sub identifier of identifier "
f"({ident.name}), please set one",
)
)
elif sub_id.ref is not None and sub_id.ref not in [
i.name for i in entity.identifiers
]:
issues.append(
ValidationError(
context=context,
message=f"Identifier ref must reference an existing identifier by name. "
f"No identifier in this entity has name: {sub_id.ref}",
)
)
elif not sub_id.ref and not sub_id.name:
issues.append(
ValidationError(
context=context,
message=f"Must provide either name or ref for sub identifier of identifier "
f"with name: {ident.reference.name}",
)
)
if sub_id.name:
for i in entity.identifiers:
if i.name == sub_id.name and i.expr != sub_id.expr:
issues.append(
ValidationError(
context=context,
message=f"If sub identifier has same name ({sub_id.name}) "
f"as an existing Identifier they must have the same expr",
)
)
break
return issues
class NaturalIdentifierConfigurationRule(ModelValidationRule):
"""Ensures that identifiers marked as IdentifierType.NATURAL are configured correctly"""
@staticmethod
def _validate_entity_natural_identifiers(entity: Entity) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
context = EntityContext(
entity=EntityReference(entity_name=entity.name),
)
natural_identifier_names = set(
[
identifier.name
for identifier in entity.identifiers
if identifier.type is IdentifierType.NATURAL
]
)
if len(natural_identifier_names) > 1:
error = ValidationError(
context=context,
message=f"Entities can have at most one natural identifier, but entity "
f"`{entity.name}` has {len(natural_identifier_names)} distinct natural identifiers set! "
f"{natural_identifier_names}.",
)
issues.append(error)
if natural_identifier_names and not [
dim for dim in entity.dimensions if dim.validity_params
]:
error = ValidationError(
context=context,
message=f"The use of `natural` identifiers is currently supported only in conjunction with a validity "
f"window defined in the set of time dimensions associated with the entity. entity "
f"`{entity.name}` uses a natural identifier ({natural_identifier_names}) but does not define a "
f"validity window!",
)
issues.append(error)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Validate identifiers marked as IdentifierType.NATURAL"""
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues += NaturalIdentifierConfigurationRule._validate_entity_natural_identifiers(
entity=entity
)
return issues
class OnePrimaryIdentifierPerEntityRule(ModelValidationRule):
"""Ensures that each entity has only one primary identifier"""
@staticmethod
def _only_one_primary_identifier(entity: Entity) -> List[ValidationIssueType]:
primary_identifier_names: MutableSet[str] = set()
for identifier in entity.identifiers or []:
if identifier.type == IdentifierType.PRIMARY:
primary_identifier_names.add(identifier.reference.name)
if len(primary_identifier_names) > 1:
return [
ValidationError(
message=f"Entities can have only one primary identifier. The entity"
f" `{entity.name}` has {len(primary_identifier_names)}: {', '.join(primary_identifier_names)}",
context=EntityContext(
entity=EntityReference(entity_name=entity.name),
),
)
]
return []
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues = []
for entity in model.entities:
issues += OnePrimaryIdentifierPerEntityRule._only_one_primary_identifier(entity=entity)
return issues
@dataclass(frozen=True)
class SubIdentifierContext:
"""Organizes the context behind identifiers and their sub-identifiers."""
entity: Entity
identifier_reference: IdentifierReference
sub_identifier_names: Tuple[str, ...]
class IdentifierConsistencyRule(ModelValidationRule):
"""Checks identifiers with the same name are defined with the same set of sub-identifiers in all entitys"""
@staticmethod
def _get_sub_identifier_names(identifier: Identifier) -> Sequence[str]:
sub_identifier_names = []
sub_identifier: CompositeSubIdentifier
for sub_identifier in identifier.identifiers or []:
if sub_identifier.name:
sub_identifier_names.append(sub_identifier.name)
elif sub_identifier.ref:
sub_identifier_names.append(sub_identifier.ref)
return sub_identifier_names
@staticmethod
def _get_sub_identifier_context(entity: Entity) -> Sequence[SubIdentifierContext]:
contexts = []
for identifier in entity.identifiers or []:
contexts.append(
SubIdentifierContext(
entity=entity,
identifier_reference=identifier.reference,
sub_identifier_names=tuple(
IdentifierConsistencyRule._get_sub_identifier_names(identifier)
),
)
)
return contexts
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
# build collection of sub-identifier contexts, keyed by identifier name
identifier_to_sub_identifier_contexts: DefaultDict[
str, List[SubIdentifierContext]
] = defaultdict(list)
all_contexts: List[SubIdentifierContext] = list(
iter_flatten(
[
IdentifierConsistencyRule._get_sub_identifier_context(entity)
for entity in model.entities
]
)
)
for context in all_contexts:
identifier_to_sub_identifier_contexts[context.identifier_reference.name].append(
context
)
# Filter out anything that has fewer than 2 distinct sub-identifier sets
invalid_sub_identifier_configurations = dict(
filter(
lambda item: len(set([context.sub_identifier_names for context in item[1]])) >= 2,
identifier_to_sub_identifier_contexts.items(),
)
)
# convert each invalid identifier configuration into a validation warning
for (
identifier_name,
sub_identifier_contexts,
) in invalid_sub_identifier_configurations.items():
entity = sub_identifier_contexts[0].entity
issues.append(
ValidationWarning(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=identifier_name
),
element_type=EntityElementType.IDENTIFIER,
),
message=(
f"Identifier '{identifier_name}' does not have consistent sub-identifiers "
f"throughout the model: {list(sorted(sub_identifier_contexts, key=lambda x: x.sub_identifier_names))}"
),
)
)
return issues

View File

@@ -0,0 +1,454 @@
from collections import defaultdict
from typing import DefaultDict, Dict, List, Set
from dbt.semantic.aggregation_properties import AggregationType
from dbt.semantic.references import MetricModelReference, MeasureReference
from dbt.contracts.graph.dimensions import DimensionType
from dbt.contracts.graph.metrics import MetricType
from dbt.contracts.graph.nodes import Metric
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
from dbt.semantic.validations.validator_helpers import (
EntityElementContext,
EntityElementReference,
EntityElementType,
MetricContext,
ModelValidationRule,
ValidationIssueType,
ValidationError,
ValidationWarning,
iter_bucket,
)
class MeasureMetricProxyUniqueRule(ModelValidationRule):
"""Asserts that measure names and metric names don't match unless measure proxy"""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
metric_names = [metric.name for metric in model.metrics]
for entity in model.entities:
for measure in entity.measures:
if measure.name in metric_names:
measure_name_match_index = next(
(
i
for i, metric in enumerate(model.metrics)
if metric.name == measure.name
)
)
if measure_name_match_index:
if (
model.metrics[measure_name_match_index].type
!= MetricType.MEASURE_PROXY
):
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=f"Cannot have metric with the same name as a measure ({measure.name}) that is not a "
f"proxy for that measure",
)
)
return issues
class EntityMeasuresUniqueRule(ModelValidationRule):
"""Asserts all measure names are unique across the model."""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
measure_references_to_entities: Dict[MeasureReference, List] = defaultdict(list)
for entity in model.entities:
for measure in entity.measures:
if measure.reference in measure_references_to_entities:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=f"Found measure with name {measure.name} in multiple entitys with names "
f"({measure_references_to_entities[measure.reference]})",
)
)
measure_references_to_entities[measure.reference].append(entity.name)
return issues
class MeasureConstraintAliasesRule(ModelValidationRule):
"""Checks that aliases are configured correctly for constrained measure references
These are, currently, only applicable for Metric types, since the MetricInputMeasure is only
"""
@staticmethod
def _validate_required_aliases_are_set(
metric: Metric, metric_context: MetricContext
) -> List[ValidationIssueType]:
"""Checks if valid aliases are set on the input measure references where they are required
Aliases are required whenever there are 2 or more input measures with the same measure
reference with different constraints. When this happens, we require aliases for all
constrained measures for the sake of clarity. Any unconstrained measure does not
need an alias, since it always relies on the original measure specification.
At this time aliases are required for ratio metrics, but eventually we could relax that requirement
if we can find an automatic aliasing scheme for numerator/denominator that we feel comfortable using.
"""
issues: List[ValidationIssueType] = []
if len(metric.measure_references) == len(set(metric.measure_references)):
# All measure references are unique, so disambiguation via aliasing is not necessary
return issues
# Note: more_itertools.bucket does not produce empty groups
input_measures_by_name = iter_bucket(metric.input_measures, lambda x: x.name)
for name in input_measures_by_name:
input_measures = list(input_measures_by_name[name])
if len(input_measures) == 1:
continue
distinct_input_measures = set(input_measures)
if len(distinct_input_measures) == 1:
# Warn whenever multiple identical references exist - we will consolidate these but it might be
# a meaningful oversight if constraints and aliases are specified
issues.append(
ValidationWarning(
context=metric_context,
message=(
f"Metric {metric.name} has multiple identical input measures specifications for measure "
f"{name}. This might be hiding a semantic error. Input measure specification: "
f"{input_measures[0]}."
),
)
)
continue
constrained_measures_without_aliases = [
measure
for measure in input_measures
if measure.constraint is not None and measure.alias is None
]
if constrained_measures_without_aliases:
issues.append(
ValidationError(
context=metric_context,
message=(
f"Metric {metric.name} depends on multiple different constrained versions of measure "
f"{name}. In such cases, aliases must be provided, but the following input measures have "
f"constraints specified without an alias: {constrained_measures_without_aliases}."
),
)
)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Ensures measures that might need an alias have one set, and that the alias is distinct
We do not allow aliases to collide with other alias or measure names, since that could create
ambiguity at query time or cause issues if users ever restructure their models.
"""
issues: List[ValidationIssueType] = []
measure_names = _get_measure_names_from_model(model)
measure_alias_to_metrics: DefaultDict[str, List[str]] = defaultdict(list)
for metric in model.metrics:
metric_context = MetricContext(
metric=MetricModelReference(metric_name=metric.name),
)
issues += MeasureConstraintAliasesRule._validate_required_aliases_are_set(
metric=metric, metric_context=metric_context
)
aliased_measures = [
input_measure
for input_measure in metric.input_measures
if input_measure.alias is not None
]
for measure in aliased_measures:
assert (
measure.alias
), "Type refinement assertion, previous filter should ensure this is true"
issues += UniqueAndValidNameRule.check_valid_name(measure.alias)
if measure.alias in measure_names:
issues.append(
ValidationError(
context=metric_context,
message=(
f"Alias `{measure.alias}` for measure `{measure.name}` conflicts with measure names "
f"defined elsewhere in the model! This can cause ambiguity for certain types of "
f"query. Please choose another alias."
),
)
)
if measure.alias in measure_alias_to_metrics:
issues.append(
ValidationError(
context=metric_context,
message=(
f"Measure alias {measure.alias} conflicts with a measure alias used elsewhere in the "
f"model! This can cause ambiguity for certain types of query. Please choose another "
f"alias, or, if the measures are constrained in the same way, consider centralizing "
f"that definition in a new entity. Measure specification: {measure}. Existing "
f"metrics with that measure alias used: {measure_alias_to_metrics[measure.alias]}"
),
)
)
measure_alias_to_metrics[measure.alias].append(metric.name)
return issues
class MetricMeasuresRule(ModelValidationRule):
"""Checks that the measures referenced in the metrics exist."""
@staticmethod
def _validate_metric_measure_references(
metric: Metric, valid_measure_names: Set[str]
) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
for measure_reference in metric.measure_references:
if measure_reference.name not in valid_measure_names:
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message=(
f"Measure {measure_reference.name} referenced in metric {metric.name} is not "
f"defined in the model!"
),
)
)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
valid_measure_names = _get_measure_names_from_model(model)
for metric in model.metrics or []:
issues += MetricMeasuresRule._validate_metric_measure_references(
metric=metric, valid_measure_names=valid_measure_names
)
return issues
class MeasuresNonAdditiveDimensionRule(ModelValidationRule):
"""Checks that the measure's non_additive_dimensions are properly defined."""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
for entity in model.entities or []:
for measure in entity.measures:
non_additive_dimension = measure.non_additive_dimension
if non_additive_dimension is None:
continue
agg_time_dimension = next(
(
dim
for dim in entity.dimensions
if measure.checked_agg_time_dimension.name == dim.name
),
None,
)
if agg_time_dimension is None:
# Sanity check, should never hit this
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a agg_time_dimension of {measure.checked_agg_time_dimension.name} "
f"that is not defined as a dimension in entity '{entity.name}'."
),
)
)
continue
# Validates that the non_additive_dimension exists as a time dimension in the entity
matching_dimension = next(
(dim for dim in entity.dimensions if non_additive_dimension.name == dim.name),
None,
)
if matching_dimension is None:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' "
f"that is not defined as a dimension in entity '{entity.name}'."
),
)
)
if matching_dimension:
# Check that it's a time dimension
if matching_dimension.type != DimensionType.TIME:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' "
f"that is defined as a categorical dimension which is not supported."
),
)
)
# Validates that the non_additive_dimension time_granularity is >= agg_time_dimension time_granularity
if (
matching_dimension.type_params
and agg_time_dimension.type_params
and (
matching_dimension.type_params.time_granularity
!= agg_time_dimension.type_params.time_granularity
)
):
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a non_additive_dimension with name '{non_additive_dimension.name}' that has "
f"a base time granularity ({matching_dimension.type_params.time_granularity.name}) that is not equal to the measure's "
f"agg_time_dimension {agg_time_dimension.name} with a base granularity of ({agg_time_dimension.type_params.time_granularity.name})."
),
)
)
# Validates that the window_choice is either MIN/MAX
if non_additive_dimension.window_choice not in {
AggregationType.MIN,
AggregationType.MAX,
}:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a non_additive_dimension with an invalid 'window_choice' of '{non_additive_dimension.window_choice.value}'. "
f"Only choices supported are 'min' or 'max'."
),
)
)
# Validates that all window_groupings are identifiers
identifiers_in_entity = {identifier.name for identifier in entity.identifiers}
window_groupings = set(non_additive_dimension.window_groupings)
intersected_identifiers = window_groupings.intersection(identifiers_in_entity)
if len(intersected_identifiers) != len(window_groupings):
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=(
f"Measure '{measure.name}' has a non_additive_dimension with an invalid 'window_groupings'. "
f"These identifiers {window_groupings.difference(intersected_identifiers)} do not exist in the entity."
),
)
)
return issues
class CountAggregationExprRule(ModelValidationRule):
"""Checks that COUNT measures have an expr provided."""
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
for entity in model.entities:
for measure in entity.measures:
context = EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
)
if measure.agg == AggregationType.COUNT and measure.expr is None:
issues.append(
ValidationError(
context=context,
message=(
f"Measure '{measure.name}' uses a COUNT aggregation, which requires an expr to be provided. "
f"Provide 'expr: 1' if a count of all rows is desired."
),
)
)
if (
measure.agg == AggregationType.COUNT
and measure.expr
and measure.expr.lower().startswith("distinct ")
):
# TODO: Expand this to include SUM and potentially AVG agg types as well
# Note expansion of this guard requires the addition of sum_distinct and avg_distinct agg types
# or else an adjustment to the error message below.
issues.append(
ValidationError(
context=context,
message=(
f"Measure '{measure.name}' uses a '{measure.agg.value}' aggregation with a DISTINCT expr: "
f"'{measure.expr}. This is not supported, as it effectively converts an additive "
f"measure into a non-additive one, and this could cause certain queries to return "
f"incorrect results. Please use the {measure.agg.value}_distinct aggregation type."
),
)
)
return issues
def _get_measure_names_from_model(model: UserConfiguredModel) -> Set[str]:
"""Return every distinct measure name specified in the model"""
measure_names = set()
for entity in model.entities:
for measure in entity.measures:
measure_names.add(measure.reference.name)
return measure_names

View File

@@ -0,0 +1,143 @@
import traceback
import sys
from typing import List
from dbt.exceptions import DbtSemanticValidationError
from dbt.semantic.references import MetricModelReference
from dbt.contracts.graph.nodes import Metric
from dbt.contracts.graph.metrics import MetricType, MetricTimeWindow
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
from dbt.semantic.validations.validator_helpers import (
MetricContext,
ModelValidationRule,
ValidationIssueType,
ValidationError,
)
class CumulativeMetricRule(ModelValidationRule):
"""Checks that cumulative sum metrics are configured properly"""
@staticmethod
def _validate_cumulative_sum_metric_params(metric: Metric) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
if metric.type == MetricType.CUMULATIVE:
if metric.type_params.window and metric.type_params.grain_to_date:
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message="Both window and grain_to_date set for cumulative metric. Please set one or the other",
)
)
if metric.type_params.window:
try:
MetricTimeWindow.parse(metric.type_params.window.to_string())
except DbtSemanticValidationError as e:
breakpoint()
if sys.version_info >= (3, 10):
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message="".join(
traceback.format_exception_only(exc=type(e), value=e)
),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
elif sys.version_info < (3, 10):
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message="".join(
traceback.format_exception_only(etype=type(e), value=e)
),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
for metric in model.metrics or []:
issues += CumulativeMetricRule._validate_cumulative_sum_metric_params(metric=metric)
return issues
class DerivedMetricRule(ModelValidationRule):
"""Checks that derived metrics are configured properly"""
@staticmethod
def _validate_alias_collision(metric: Metric) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
if metric.type == MetricType.DERIVED:
used_names = {input_metric.name for input_metric in metric.input_metrics}
for input_metric in metric.input_metrics:
if input_metric.alias:
issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias)
if input_metric.alias in used_names:
issues.append(
ValidationError(
message=f"Alias '{input_metric.alias}' for input metric: '{input_metric.name}' is already being used. Please choose another alias.",
)
)
used_names.add(input_metric.alias)
return issues
@staticmethod
def _validate_input_metrics_exist(model: UserConfiguredModel) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
all_metrics = {m.name for m in model.metrics}
for metric in model.metrics:
if metric.type == MetricType.DERIVED:
for input_metric in metric.input_metrics:
if input_metric.name not in all_metrics:
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not exist as a configured metric in the model.",
)
)
return issues
@staticmethod
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
for input_metric in metric.input_metrics or []:
if input_metric.offset_window and input_metric.offset_to_grain:
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message=f"Both offset_window and offset_to_grain set for derived metric '{metric.name}' on input metric '{input_metric.name}'. Please set one or the other.",
)
)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues: List[ValidationIssueType] = []
issues += DerivedMetricRule._validate_input_metrics_exist(model=model)
for metric in model.metrics or []:
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
return issues

View File

@@ -0,0 +1,102 @@
from concurrent.futures import ProcessPoolExecutor
import copy
from typing import List, Sequence
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.agg_time_dimensions import AggregationTimeDimensionRule
from dbt.semantic.validations.entities import (
EntityTimeDimensionWarningsRule,
EntityValidityWindowRule,
)
from dbt.semantic.validations.dimension_const import DimensionConsistencyRule
from dbt.semantic.validations.element_const import ElementConsistencyRule
from dbt.semantic.validations.identifiers import (
IdentifierConfigRule,
IdentifierConsistencyRule,
NaturalIdentifierConfigurationRule,
OnePrimaryIdentifierPerEntityRule,
)
from dbt.semantic.validations.measures import (
CountAggregationExprRule,
EntityMeasuresUniqueRule,
MeasureConstraintAliasesRule,
MetricMeasuresRule,
MeasuresNonAdditiveDimensionRule,
MeasureMetricProxyUniqueRule,
)
from dbt.semantic.validations.metrics import CumulativeMetricRule, DerivedMetricRule
from dbt.semantic.validations.reserved_keywords import ReservedKeywordsRule
from dbt.semantic.validations.unique_valid_name import UniqueAndValidNameRule
from dbt.semantic.validations.validator_helpers import (
ModelValidationResults,
ModelValidationRule,
ModelValidationException,
ModelBuildResult,
)
class ModelValidator:
"""A Validator that acts on UserConfiguredModel"""
DEFAULT_RULES = (
DerivedMetricRule(),
CountAggregationExprRule(),
EntityMeasuresUniqueRule(),
EntityTimeDimensionWarningsRule(),
EntityValidityWindowRule(),
DimensionConsistencyRule(),
ElementConsistencyRule(),
IdentifierConfigRule(),
IdentifierConsistencyRule(),
NaturalIdentifierConfigurationRule(),
OnePrimaryIdentifierPerEntityRule(),
MeasureConstraintAliasesRule(),
MetricMeasuresRule(),
CumulativeMetricRule(),
UniqueAndValidNameRule(),
AggregationTimeDimensionRule(),
ReservedKeywordsRule(),
MeasuresNonAdditiveDimensionRule(),
MeasureMetricProxyUniqueRule(),
)
def __init__(
self, rules: Sequence[ModelValidationRule] = DEFAULT_RULES, max_workers: int = 1
) -> None:
"""Constructor.
Args:
rules: List of validation rules to run. Defaults to DEFAULT_RULES
max_workers: sets the max number of rules to run against the model concurrently
"""
# Raises an error if 'rules' is an empty sequence or None
if not rules:
raise ValueError(
"ModelValidator 'rules' must be a sequence with at least one ModelValidationRule."
)
self._rules = rules
self._executor = ProcessPoolExecutor(max_workers=max_workers)
def validate_model(self, model: UserConfiguredModel) -> ModelBuildResult:
"""Validate a model according to configured rules."""
issues: List[ModelValidationResults] = []
for rule in self._rules:
issues.append(ModelValidationResults.from_issues_sequence(rule.validate_model(model)))
return ModelBuildResult(model=model, issues=ModelValidationResults.merge(issues))
def checked_validations(
self, model: UserConfiguredModel
) -> UserConfiguredModel: # chTODO: remember checked_build
"""Similar to validate(), but throws an exception if validation fails."""
model_copy = copy.deepcopy(model)
build_result = self.validate_model(model_copy)
if build_result.issues.has_blocking_issues:
raise ModelValidationException(issues=tuple(build_result.issues.all_issues))
return model

View File

@@ -0,0 +1,53 @@
# TODO: Delete this rule as we don't need to enforce that these nodes are present
# from typing import List
# from dbt.contracts.graph.manifest import UserConfiguredModel
# from dbt.semantic.validations.validator_helpers import (
# ModelValidationRule,
# ValidationError,
# ValidationIssueType,
# )
# class NonEmptyRule(ModelValidationRule):
# """Check if the model contains data sources and metrics."""
# @staticmethod
# def _check_model_has_data_sources(model: UserConfiguredModel) -> List[ValidationIssueType]:
# issues: List[ValidationIssueType] = []
# if not model.entities:
# issues.append(
# ValidationError(
# message="No entities present in the model.",
# )
# )
# return issues
# @staticmethod
# def _check_model_has_metrics(model: UserConfiguredModel) -> List[ValidationIssueType]:
# issues: List[ValidationIssueType] = []
# # If we are going to generate measure proxy metrics that is sufficient as well
# create_measure_proxy_metrics = False
# for data_source in model.data_sources:
# for measure in data_source.measures:
# if measure.create_metric is True:
# create_measure_proxy_metrics = True
# break
# if not model.metrics and not create_measure_proxy_metrics:
# issues.append(
# ValidationError(
# message="No metrics present in the model.",
# )
# )
# return issues
# @staticmethod
# def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
# issues: List[ValidationIssueType] = []
# issues += NonEmptyRule._check_model_has_data_sources(model=model)
# issues += NonEmptyRule._check_model_has_metrics(model=model)
# return issues

View File

@@ -0,0 +1,139 @@
from typing import List
from dbt.semantic.references import EntityElementReference
from dbt.contracts.graph.nodes import Entity
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityElementContext,
EntityElementType,
ModelValidationRule,
ValidationError,
ValidationIssueType,
)
# A non-exaustive tuple of reserved keywords
# This list was created by running an intersection of keywords for redshift,
# postgres, bigquery, and snowflake
RESERVED_KEYWORDS = (
"and",
"as",
"create",
"distinct",
"for",
"from",
"full",
"having",
"in",
"inner",
"into",
"is",
"join",
"left",
"like",
"natural",
"not",
"null",
"on",
"or",
"order",
"right",
"select",
"union",
"using",
"where",
"with",
)
class ReservedKeywordsRule(ModelValidationRule):
"""Check that any element that ends up being selected by name (instead of expr) isn't a commonly reserved keyword.
Note: This rule DOES NOT catch all keywords. That is because keywords are
engine specific, and semantic validations are not engine specific. I.e. if
you change your underlying data warehouse engine, semantic validations
should still pass, but your data warehouse validations might fail. However,
data warehouse validations are slow in comparison to semantic validation
rules. Thus this rule is intended to catch words that are reserved keywords
in all supported engines and to fail fast. E.g., `USER` is a reserved keyword
in Redshift but not in all other supported engines. Therefore if one is
using Redshift and sets a dimension name to `user`, the config would pass
this rule, but would then fail Data Warehouse Validations.
"""
@staticmethod
def _validate_entity_sub_elements(entity: Entity) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
if entity.dimensions:
for dimension in entity.dimensions:
if dimension.name.lower() in RESERVED_KEYWORDS:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=dimension.name
),
element_type=EntityElementType.DIMENSION,
),
message=f"'{dimension.name}' is an SQL reserved keyword, and thus cannot be used as a dimension 'name'.",
)
)
if entity.identifiers:
for identifier in entity.identifiers:
if identifier.is_composite:
msg = "'{name}' is an SQL reserved keyword, and thus cannot be used as a sub-identifier 'name'"
names = [
sub_ident.name
for sub_ident in identifier.identifiers
if sub_ident.name is not None
]
else:
msg = "'{name}' is an SQL reserved keyword, and thus cannot be used as an identifier 'name'"
names = [identifier.name]
for name in names:
if name.lower() in RESERVED_KEYWORDS:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=identifier.name
),
element_type=EntityElementType.IDENTIFIER,
),
message=msg.format(name=name),
)
)
if entity.measures:
for measure in entity.measures:
if measure.name.lower() in RESERVED_KEYWORDS:
issues.append(
ValidationError(
context=EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
message=f"'{measure.name}' is an SQL reserved keyword, and thus cannot be used as an measure 'name'.",
)
)
return issues
@classmethod
def _validate_entities(cls, model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Checks names of objects that are not nested."""
issues: List[ValidationIssueType] = []
for entity in model.entities:
issues += cls._validate_entity_sub_elements(entity=entity)
return issues
@classmethod
def validate_model(cls, model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
return cls._validate_entities(model=model)

View File

@@ -0,0 +1,213 @@
from __future__ import annotations
import enum
import re
from typing import Dict, Tuple, List
from dbt.semantic.references import (
EntityElementReference,
EntityReference,
MetricModelReference,
ElementReference,
)
from dbt.contracts.graph.nodes import Entity
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.validations.validator_helpers import (
EntityContext,
EntityElementContext,
EntityElementType,
MetricContext,
ModelValidationRule,
ValidationContext,
ValidationError,
ValidationIssueType,
)
from dbt.semantic.object_utils import assert_values_exhausted
from dbt.semantic.time import TimeGranularity
@enum.unique
class SemanticReservedKeywords(enum.Enum):
"""Enumeration of reserved keywords with helper for accessing the reason they are reserved"""
METRIC_TIME = "metric_time"
DBT_INTERNAL_UUID = "dbt_internal_uuid"
@staticmethod
def get_reserved_reason(keyword: SemanticReservedKeywords) -> str:
"""Get the reason a given keyword is reserved. Guarantees an exhaustive switch"""
if keyword is SemanticReservedKeywords.METRIC_TIME:
return (
"Used as the query input for creating time series metrics from measures with "
"different time dimension names."
)
elif keyword is SemanticReservedKeywords.DBT_INTERNAL_UUID:
return "Used internally to reference a column that has a uuid generated by dbt."
else:
assert_values_exhausted(keyword)
class UniqueAndValidNameRule(ModelValidationRule):
"""Check that names are unique and valid.
* Names of elements in data sources are unique / valid within the data source.
* Names of data sources, dimension sets, metric sets, and materializations in the model are unique / valid.
"""
NAME_REGEX = re.compile(r"\A[a-z][a-z0-9_]*[a-z0-9]\Z")
@staticmethod
def check_valid_name(name: str) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
if not UniqueAndValidNameRule.NAME_REGEX.match(name):
issues.append(
ValidationError(
message=f"Invalid name `{name}` - names should only consist of lower case letters, numbers, "
f"and underscores. In addition, names should start with a lower case letter, and should not end "
f"with an underscore, and they must be at least 2 characters long.",
)
)
if name.upper() in TimeGranularity.list_names():
issues.append(
ValidationError(
message=f"Invalid name `{name}` - names cannot match reserved time granularity keywords "
f"({TimeGranularity.list_names()})",
)
)
if name.lower() in {reserved_name.value for reserved_name in SemanticReservedKeywords}:
reason = SemanticReservedKeywords.get_reserved_reason(
SemanticReservedKeywords(name.lower())
)
issues.append(
ValidationError(
message=f"Invalid name `{name}` - this name is reserved by MetricFlow. Reason: {reason}",
)
)
return issues
@staticmethod
def _validate_entity_elements(entity: Entity) -> List[ValidationIssueType]:
issues: List[ValidationIssueType] = []
element_info_tuples: List[Tuple[ElementReference, str, ValidationContext]] = []
if entity.measures:
for measure in entity.measures:
element_info_tuples.append(
(
measure.reference,
"measure",
EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=measure.name
),
element_type=EntityElementType.MEASURE,
),
)
)
if entity.identifiers:
for identifier in entity.identifiers:
element_info_tuples.append(
(
identifier.reference,
"identifier",
EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=identifier.name
),
element_type=EntityElementType.IDENTIFIER,
),
)
)
if entity.dimensions:
for dimension in entity.dimensions:
element_info_tuples.append(
(
dimension.reference,
"dimension",
EntityElementContext(
entity_element=EntityElementReference(
entity_name=entity.name, name=dimension.name
),
element_type=EntityElementType.DIMENSION,
),
)
)
name_to_type: Dict[ElementReference, str] = {}
for name, _type, context in element_info_tuples:
if name in name_to_type:
issues.append(
ValidationError(
message=f"In entity `{entity.name}`, can't use name `{name.name}` for a "
f"{_type} when it was already used for a {name_to_type[name]}",
)
)
else:
name_to_type[name] = _type
for name, _type, context in element_info_tuples:
issues += UniqueAndValidNameRule.check_valid_name(name=name.name)
return issues
@staticmethod
def _validate_top_level_objects(model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Checks names of objects that are not nested."""
object_info_tuples = []
if model.entities:
for entity in model.entities:
object_info_tuples.append(
(
entity.name,
"entity",
EntityContext(
entity=EntityReference(entity_name=entity.name),
),
)
)
name_to_type: Dict[str, str] = {}
issues: List[ValidationIssueType] = []
for name, type_, context in object_info_tuples:
if name in name_to_type:
issues.append(
ValidationError(
message=f"Can't use name `{name}` for a {type_} when it was already used for a "
f"{name_to_type[name]}",
)
)
else:
name_to_type[name] = type_
if model.metrics:
metric_names = set()
for metric in model.metrics:
if metric.name in metric_names:
issues.append(
ValidationError(
context=MetricContext(
metric=MetricModelReference(metric_name=metric.name),
),
message=f"Can't use name `{metric.name}` for a metric when it was already used for a metric",
)
)
else:
metric_names.add(metric.name)
for name, _type, context in object_info_tuples:
issues += UniqueAndValidNameRule.check_valid_name(name=name)
return issues
@staticmethod
def validate_model(model: UserConfiguredModel) -> List[ValidationIssueType]: # noqa: D
issues = []
issues += UniqueAndValidNameRule._validate_top_level_objects(model=model)
for entity in model.entities:
issues += UniqueAndValidNameRule._validate_entity_elements(entity=entity)
return issues

View File

@@ -0,0 +1,439 @@
from __future__ import annotations
from collections import defaultdict, deque
import click
import functools
import traceback
from abc import abstractmethod
from dataclasses import dataclass
from datetime import date
from enum import Enum
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
from dbt.dataclass_schema import dbtClassMixin
from itertools import chain
from dbt.semantic.references import (
EntityElementReference,
EntityReference,
MetricModelReference,
)
from dbt.contracts.graph.dimensions import DimensionType
from dbt.semantic.user_configured_model import UserConfiguredModel
from dbt.semantic.object_utils import assert_values_exhausted
VALIDATE_SAFELY_ERROR_STR_TMPLT = (
". Issue occurred in method `{method_name}` called with {arguments_str}"
)
ValidationContextJSON = Dict[str, Union[str, int, None]]
ValidationIssueJSON = Dict[str, Union[str, int, ValidationContextJSON]]
class ValidationIssueLevel(Enum):
"""Categorize the issues found while validating a MQL model."""
# Issue should be fixed, but model will still work in MQL
WARNING = 0
# Issue doesn't prevent model from working in MQL yet, but will eventually be an error
FUTURE_ERROR = 1
# Issue will prevent the model from working in MQL
ERROR = 2
@property
def name_plural(self) -> str:
"""Controlled pluralization of ValidationIssueLevel name value"""
return f"{self.name}S"
ISSUE_COLOR_MAP = {
ValidationIssueLevel.WARNING: "cyan",
ValidationIssueLevel.ERROR: "bright_red",
ValidationIssueLevel.FUTURE_ERROR: "bright_yellow",
}
class EntityElementType(Enum):
"""Maps entity element types to a readable string."""
MEASURE = "measure"
DIMENSION = "dimension"
IDENTIFIER = "identifier"
@dataclass
class MetricContext(dbtClassMixin):
"""The context class for validation issues involving metrics"""
metric: MetricModelReference
def context_str(self) -> str:
"""Human readable stringified representation of the context"""
return f"With metric `{self.metric.metric_name}`"
@dataclass
class EntityContext(dbtClassMixin):
"""The context class for validation issues involving entities"""
entity: EntityReference
def context_str(self) -> str:
"""Human readable stringified representation of the context"""
return f"With entity `{self.entity.entity_name}`"
@dataclass
class EntityElementContext(dbtClassMixin):
"""The context class for validation issues involving dimensions"""
entity_element: EntityElementReference
element_type: EntityElementType
def context_str(self) -> str:
"""Human readable stringified representation of the context"""
return f"With {self.element_type.value} `{self.entity_element.name}` in entity `{self.entity_element.entity_name}`"
ValidationContext = Union[
MetricContext,
EntityContext,
EntityElementContext,
]
@dataclass
class ValidationIssue(dbtClassMixin):
"""The abstract base ValidationIsssue class that the specific ValidationIssue classes are built from"""
message: str
context: Optional[ValidationContext] = None
extra_detail: Optional[str] = None
@property
@abstractmethod
def level(self) -> ValidationIssueLevel:
"""The level of of ValidationIssue"""
raise NotImplementedError
def as_readable_str(self, verbose: bool = False, prefix: Optional[str] = None) -> str:
"""Return a easily readable string that can be used to log the issue."""
prefix = prefix or self.level.name
# The following is two lines instead of one line because
# technically self.context.context_str() can return an empty str
context_str = self.context.context_str() if self.context else ""
context_str += " - " if context_str != "" else ""
issue_str = f"{prefix}: {context_str}{self.message}"
if verbose and self.extra_detail is not None:
issue_str += f"\n{self.extra_detail}"
return issue_str
def as_cli_formatted_str(self, verbose: bool = False) -> str:
"""Returns a color-coded readable string for rendering issues in the CLI"""
return self.as_readable_str(
verbose=verbose,
prefix=click.style(self.level.name, bold=True, fg=ISSUE_COLOR_MAP[self.level]),
)
@dataclass
class ValidationWarning(ValidationIssue, dbtClassMixin):
"""A warning that was found while validating the model."""
@property
def level(self) -> ValidationIssueLevel: # noqa: D
return ValidationIssueLevel.WARNING
@dataclass
class ValidationFutureError(ValidationIssue, dbtClassMixin):
"""A future error that was found while validating the model."""
error_date: date = date(year=2030, month=1, day=1)
@property
def level(self) -> ValidationIssueLevel: # noqa: D
return ValidationIssueLevel.FUTURE_ERROR
def as_readable_str(self, verbose: bool = False, prefix: Optional[str] = None) -> str:
"""Return a easily readable string that can be used to log the issue."""
return (
f"{super().as_readable_str(verbose=verbose, prefix=prefix)}"
f"IMPORTANT: this error will break your model starting {self.error_date.strftime('%b %d, %Y')}. "
)
@dataclass
@dataclass
class ValidationError(ValidationIssue, dbtClassMixin):
"""An error that was found while validating the model."""
@property
def level(self) -> ValidationIssueLevel: # noqa: D
return ValidationIssueLevel.ERROR
ValidationIssueType = Union[ValidationWarning, ValidationFutureError, ValidationError]
@dataclass
class ModelValidationResults(dbtClassMixin):
"""Class for organizating the results of running validations"""
warnings: Tuple[ValidationWarning, ...] = tuple()
future_errors: Tuple[ValidationFutureError, ...] = tuple()
errors: Tuple[ValidationError, ...] = tuple()
@property
def has_blocking_issues(self) -> bool:
"""Does the ModelValidationResults have ERROR issues"""
return len(self.errors) != 0
@classmethod
def from_issues_sequence(cls, issues: Sequence[ValidationIssueType]) -> ModelValidationResults:
"""Constructs a ModelValidationResults class from a list of ValidationIssues"""
warnings: List[ValidationWarning] = []
future_errors: List[ValidationFutureError] = []
errors: List[ValidationError] = []
for issue in issues:
if issue.level is ValidationIssueLevel.WARNING:
warnings.append(issue)
elif issue.level is ValidationIssueLevel.FUTURE_ERROR:
future_errors.append(issue)
elif issue.level is ValidationIssueLevel.ERROR:
errors.append(issue)
else:
assert_values_exhausted(issue.level)
return cls(
warnings=tuple(warnings), future_errors=tuple(future_errors), errors=tuple(errors)
)
@classmethod
def merge(cls, results: Sequence[ModelValidationResults]) -> ModelValidationResults:
"""Creates a new ModelValidatorResults instance from multiple instances
This is useful when there are multiple validators that are run and the
combined results are desireable. For instance there is a ModelValidator
and a DataWarehouseModelValidator. These both return validation issues.
If it's desireable to combine the results, the following makes it easy.
"""
if not isinstance(results, List):
results = list(results)
# this nested comprehension syntax is a little disorienting
# basically [element for object in list_of_objects for element in object.list_property]
# translates to "for each element in an object's list for each object in a list of objects"
warnings = tuple(issue for result in results for issue in result.warnings)
future_errors = tuple(issue for result in results for issue in result.future_errors)
errors = tuple(issue for result in results for issue in result.errors)
return cls(
warnings=warnings,
future_errors=future_errors,
errors=errors,
)
@property
def all_issues(self) -> Tuple[ValidationIssueType, ...]:
"""For when a singular list of issues is needed"""
return self.errors + self.future_errors + self.warnings
def summary(self) -> str:
"""Returns a stylized summary string for issues"""
errors = click.style(
text=f"{ValidationIssueLevel.ERROR.name_plural}: {len(self.errors)}",
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.ERROR],
)
future_erros = click.style(
text=f"{ValidationIssueLevel.FUTURE_ERROR.name_plural}: {len(self.future_errors)}",
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.FUTURE_ERROR],
)
warnings = click.style(
text=f"{ValidationIssueLevel.WARNING.name_plural}: {len(self.warnings)}",
fg=ISSUE_COLOR_MAP[ValidationIssueLevel.WARNING],
)
return f"{errors}, {future_erros}, {warnings}"
def generate_exception_issue(
what_was_being_done: str,
e: Exception,
context: Optional[ValidationContext] = None,
extras: Dict[str, str] = {},
) -> ValidationIssueType:
"""Generates a validation issue for exceptions"""
if "stacktrace" not in extras:
extras["stacktrace"] = "".join(traceback.format_tb(e.__traceback__))
return ValidationError(
context=context,
message=f"An error occured while {what_was_being_done} - {''.join(traceback.format_exception_only(etype=type(e), value=e))}",
extra_detail="\n".join([f"{key}: {value}" for key, value in extras.items()]),
)
def _func_args_to_string(*args: Any, **kwargs: Any) -> str: # type: ignore
return f"positional args: {args}, key word args: {kwargs}"
def validate_safely(whats_being_done: str) -> Callable:
"""Decorator to safely run validation checks"""
def decorator_check_element_safely(func: Callable) -> Callable: # noqa
@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> List[ValidationIssueType]: # type: ignore
"""Safely run a check on model elements"""
issues: List[ValidationIssueType]
try:
issues = func(*args, **kwargs)
except Exception as e:
arguments_str = _func_args_to_string(*args, **kwargs)
issues = [
generate_exception_issue(
what_was_being_done=whats_being_done,
e=e,
extras={"method_name": func.__name__, "passed_args": arguments_str},
)
]
return issues
return wrapper
return decorator_check_element_safely
@dataclass(frozen=True)
class DimensionInvariants:
"""Helper object to ensure consistent dimension attributes across entities.
All dimensions with a given name in all entities should have attributes matching these values.
"""
type: DimensionType
is_partition: bool
class ModelValidationRule(dbtClassMixin):
"""Encapsulates logic for checking the values of objects in a model."""
@classmethod
@abstractmethod
def validate_model(cls, model: UserConfiguredModel) -> List[ValidationIssueType]:
"""Check the given model and return a list of validation issues"""
pass
@classmethod
def validate_model_serialized_for_multiprocessing(cls, serialized_model: str):
"""Validate a model serialized via Pydantic's .json() method, and return a list of JSON serialized issues
This method exists because our validations are forked into parallel processes via
multiprocessing.ProcessPoolExecutor, and passing a model or validation results object can result in
idiosyncratic behavior and inscrutable errors due to interactions between pickling and pydantic objects.
"""
# TODO: Fix? This might be broken
return ModelValidationResults.from_issues_sequence(
cls.validate_model(UserConfiguredModel.parse_raw(serialized_model))
).to_dict()
class ModelValidationException(Exception):
"""Exception raised when validation of a model fails."""
def __init__(self, issues: Tuple[ValidationIssueType, ...]) -> None: # noqa: D
issues_str = "\n".join([x.as_readable_str(verbose=True) for x in issues])
super().__init__(f"Error validating model. Issues:\n{issues_str}")
class iter_bucket:
"""
NOTE: Copied over from more_itertools but we don't want the dependency.
Wrap *iterable* and return an object that buckets it iterable into
child iterables based on a *key* function.
"""
def __init__(self, iterable, key, validator=None):
self._it = iter(iterable)
self._key = key
self._cache = defaultdict(deque)
self._validator = validator or (lambda x: True)
def __contains__(self, value):
if not self._validator(value):
return False
try:
item = next(self[value])
except StopIteration:
return False
else:
self._cache[value].appendleft(item)
return True
def _get_values(self, value):
"""
Helper to yield items from the parent iterator that match *value*.
Items that don't match are stored in the local cache as they
are encountered.
"""
while True:
# If we've cached some items that match the target value, emit
# the first one and evict it from the cache.
if self._cache[value]:
yield self._cache[value].popleft()
# Otherwise we need to advance the parent iterator to search for
# a matching item, caching the rest.
else:
while True:
try:
item = next(self._it)
except StopIteration:
return
item_value = self._key(item)
if item_value == value:
yield item
break
elif self._validator(item_value):
self._cache[item_value].append(item)
def __iter__(self):
for item in self._it:
item_value = self._key(item)
if self._validator(item_value):
self._cache[item_value].append(item)
yield from self._cache.keys()
def __getitem__(self, value):
if not self._validator(value):
return iter(())
return self._get_values(value)
def iter_flatten(listOfLists):
"""
NOTE: Copied over from more_itertools but we don't want the dependency.
Return an iterator flattening one level of nesting in a list of lists.
>>> list(flatten([[0, 1], [2, 3]]))
[0, 1, 2, 3]
See also :func:`collapse`, which can flatten multiple levels of nesting.
"""
return chain.from_iterable(listOfLists)
@dataclass
class ModelBuildResult: # noqa: D
model: UserConfiguredModel
# Issues found in the model.
issues: ModelValidationResults = ModelValidationResults()

View File

@@ -1,6 +1,6 @@
import json
from dbt.contracts.graph.nodes import Exposure, SourceDefinition, Metric
from dbt.contracts.graph.nodes import Exposure, SourceDefinition, Metric, Entity
from dbt.flags import get_flags
from dbt.graph import ResourceTypeSelector
from dbt.task.runnable import GraphRunnableTask
@@ -27,6 +27,7 @@ class ListTask(GraphRunnableTask):
NodeType.Source,
NodeType.Exposure,
NodeType.Metric,
NodeType.Entity,
)
)
ALL_RESOURCE_VALUES = DEFAULT_RESOURCE_VALUES | frozenset((NodeType.Analysis,))
@@ -73,6 +74,8 @@ class ListTask(GraphRunnableTask):
yield self.manifest.exposures[node]
elif node in self.manifest.metrics:
yield self.manifest.metrics[node]
elif node in self.manifest.entities:
yield self.manifest.entities[node]
else:
raise DbtRuntimeError(
f'Got an unexpected result from node selection: "{node}"'
@@ -96,6 +99,11 @@ class ListTask(GraphRunnableTask):
# metrics are searched for by pkg.metric_name
metric_selector = ".".join([node.package_name, node.name])
yield f"metric:{metric_selector}"
elif node.resource_type == NodeType.Entity:
assert isinstance(node, Entity)
# entities are searched for by pkg.entity_name
entity_selector = ".".join([node.package_name, node.name])
yield f"entity:{entity_selector}"
else:
# everything else is from `fqn`
yield ".".join(node.fqn)

21
core/dbt/task/validate.py Normal file
View File

@@ -0,0 +1,21 @@
# This task is intended to validate a semantic layer
from dbt.task.runnable import GraphRunnableTask
from dbt.events.types import DebugCmdOut
from dbt.events.functions import fire_event
class ValidateTask(GraphRunnableTask):
def __init__(self, args, config, manifest):
super().__init__(args, config, manifest)
def run(self):
fire_event(DebugCmdOut(msg="Starting validation."))
GraphRunnableTask.load_manifest(self)
breakpoint()
fire_event(DebugCmdOut(msg="Callum still has more todos"))
fire_event(DebugCmdOut(msg="Validation completed!"))
fire_event(DebugCmdOut(msg="Done."))

View File

@@ -64,6 +64,7 @@ setup(
"typing-extensions>=3.7.4",
"werkzeug>=1,<3",
"pathspec>=0.9,<0.11",
"mo-sql-parsing==9.328.23003",
# the following are all to match snowflake-connector-python
"requests<3.0.0",
"idna>=2.5,<4",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -8,6 +8,7 @@
"docs",
"exposures",
"metrics",
"entities",
"selectors"
],
"properties": {
@@ -85,6 +86,13 @@
},
"description": "The metrics defined in the dbt project and its dependencies"
},
"entities": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/Entity"
},
"description": "The entities defined in the dbt project and its dependencies"
},
"selectors": {
"type": "object",
"description": "The selectors defined in selectors.yml"
@@ -173,7 +181,7 @@
}
},
"additionalProperties": false,
"description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]])",
"description": "WritableManifest(metadata: dbt.contracts.graph.manifest.ManifestMetadata, nodes: Mapping[str, Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode]], sources: Mapping[str, dbt.contracts.graph.nodes.SourceDefinition], macros: Mapping[str, dbt.contracts.graph.nodes.Macro], docs: Mapping[str, dbt.contracts.graph.nodes.Documentation], exposures: Mapping[str, dbt.contracts.graph.nodes.Exposure], metrics: Mapping[str, dbt.contracts.graph.nodes.Metric], entities: Mapping[str, dbt.contracts.graph.nodes.Entity], selectors: Mapping[str, Any], disabled: Optional[Mapping[str, List[Union[dbt.contracts.graph.nodes.AnalysisNode, dbt.contracts.graph.nodes.SingularTestNode, dbt.contracts.graph.nodes.HookNode, dbt.contracts.graph.nodes.ModelNode, dbt.contracts.graph.nodes.RPCNode, dbt.contracts.graph.nodes.SqlNode, dbt.contracts.graph.nodes.GenericTestNode, dbt.contracts.graph.nodes.SnapshotNode, dbt.contracts.graph.nodes.SeedNode, dbt.contracts.graph.nodes.SourceDefinition]]]], parent_map: Optional[Dict[str, List[str]]], child_map: Optional[Dict[str, List[str]]])",
"definitions": {
"ManifestMetadata": {
"type": "object",
@@ -406,7 +414,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.353436
"default": 1670902215.970579
},
"config_call_dict": {
"type": "object",
@@ -460,6 +468,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -504,7 +522,7 @@
}
},
"additionalProperties": false,
"description": "AnalysisNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "AnalysisNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"FileHash": {
"type": "object",
@@ -953,7 +971,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.355371
"default": 1670902215.973521
},
"config_call_dict": {
"type": "object",
@@ -1007,6 +1025,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -1051,7 +1079,7 @@
}
},
"additionalProperties": false,
"description": "SingularTestNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "SingularTestNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"TestConfig": {
"type": "object",
@@ -1312,7 +1340,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.356482
"default": 1670902215.975156
},
"config_call_dict": {
"type": "object",
@@ -1366,6 +1394,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -1420,7 +1458,7 @@
}
},
"additionalProperties": false,
"description": "HookNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, index: Optional[int] = None)"
"description": "HookNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, index: Optional[int] = None)"
},
"ModelNode": {
"type": "object",
@@ -1569,7 +1607,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.357701
"default": 1670902215.976732
},
"config_call_dict": {
"type": "object",
@@ -1623,6 +1661,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -1667,7 +1715,7 @@
}
},
"additionalProperties": false,
"description": "ModelNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "ModelNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"RPCNode": {
"type": "object",
@@ -1816,7 +1864,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.358761
"default": 1670902215.978195
},
"config_call_dict": {
"type": "object",
@@ -1870,6 +1918,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -1914,7 +1972,7 @@
}
},
"additionalProperties": false,
"description": "RPCNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "RPCNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"SqlNode": {
"type": "object",
@@ -2063,7 +2121,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.359803
"default": 1670902215.979718
},
"config_call_dict": {
"type": "object",
@@ -2117,6 +2175,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -2161,7 +2229,7 @@
}
},
"additionalProperties": false,
"description": "SqlNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "SqlNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.NodeConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"GenericTestNode": {
"type": "object",
@@ -2306,7 +2374,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.361009
"default": 1670902215.981434
},
"config_call_dict": {
"type": "object",
@@ -2360,6 +2428,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -2424,7 +2502,7 @@
}
},
"additionalProperties": false,
"description": "GenericTestNode(test_metadata: dbt.contracts.graph.nodes.TestMetadata, database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, column_name: Optional[str] = None, file_key_name: Optional[str] = None)"
"description": "GenericTestNode(test_metadata: dbt.contracts.graph.nodes.TestMetadata, database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.TestConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None, column_name: Optional[str] = None, file_key_name: Optional[str] = None)"
},
"TestMetadata": {
"type": "object",
@@ -2577,7 +2655,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.364386
"default": 1670902215.984685
},
"config_call_dict": {
"type": "object",
@@ -2631,6 +2709,16 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
@@ -2675,7 +2763,7 @@
}
},
"additionalProperties": false,
"description": "SnapshotNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SnapshotConfig, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
"description": "SnapshotNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SnapshotConfig, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', language: str = 'sql', refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, compiled_path: Optional[str] = None, compiled: bool = False, compiled_code: Optional[str] = None, extra_ctes_injected: bool = False, extra_ctes: List[dbt.contracts.graph.nodes.InjectedCTE] = <factory>, _pre_injected_sql: Optional[str] = None)"
},
"SnapshotConfig": {
"type": "object",
@@ -3030,7 +3118,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.366245
"default": 1670902215.987447
},
"config_call_dict": {
"type": "object",
@@ -3068,7 +3156,7 @@
}
},
"additionalProperties": false,
"description": "SeedNode(database: Optional[str], schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SeedConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', root_path: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.MacroDependsOn = <factory>)"
"description": "SeedNode(database: str, schema: str, name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], alias: str, checksum: dbt.contracts.files.FileHash, config: dbt.contracts.graph.model_config.SeedConfig = <factory>, _event_status: Dict[str, Any] = <factory>, tags: List[str] = <factory>, description: str = '', columns: Dict[str, dbt.contracts.graph.nodes.ColumnInfo] = <factory>, meta: Dict[str, Any] = <factory>, docs: dbt.contracts.graph.unparsed.Docs = <factory>, patch_path: Optional[str] = None, build_path: Optional[str] = None, deferred: bool = False, unrendered_config: Dict[str, Any] = <factory>, created_at: float = <factory>, config_call_dict: Dict[str, Any] = <factory>, relation_name: Optional[str] = None, raw_code: str = '', root_path: Optional[str] = None)"
},
"SeedConfig": {
"type": "object",
@@ -3416,7 +3504,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.368067
"default": 1670902215.989922
}
},
"additionalProperties": false,
@@ -3526,12 +3614,12 @@
},
"dbt_version": {
"type": "string",
"default": "1.4.1"
"default": "1.4.0a1"
},
"generated_at": {
"type": "string",
"format": "date-time",
"default": "2023-02-09T10:04:47.347023Z"
"default": "2022-12-13T03:30:15.961825Z"
},
"invocation_id": {
"oneOf": [
@@ -3542,7 +3630,7 @@
"type": "null"
}
],
"default": "f795bc66-f417-4007-af6e-f2e513d33790"
"default": "4f2b967b-7e02-46de-a7ea-268a05e3fab1"
},
"env": {
"type": "object",
@@ -3553,7 +3641,7 @@
}
},
"additionalProperties": false,
"description": "FreshnessMetadata(dbt_schema_version: str = <factory>, dbt_version: str = '1.4.1', generated_at: datetime.datetime = <factory>, invocation_id: Optional[str] = <factory>, env: Dict[str, str] = <factory>)"
"description": "FreshnessMetadata(dbt_schema_version: str = <factory>, dbt_version: str = '1.4.0a1', generated_at: datetime.datetime = <factory>, invocation_id: Optional[str] = <factory>, env: Dict[str, str] = <factory>)"
},
"SourceFreshnessRuntimeError": {
"type": "object",
@@ -3895,7 +3983,7 @@
},
"created_at": {
"type": "number",
"default": 1675937087.368656
"default": 1670902215.990816
},
"supported_languages": {
"oneOf": [
@@ -4136,13 +4224,23 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"created_at": {
"type": "number",
"default": 1675937087.369866
"default": 1670902215.993354
}
},
"additionalProperties": false,
"description": "Exposure(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], type: dbt.contracts.graph.unparsed.ExposureType, owner: dbt.contracts.graph.unparsed.ExposureOwner, description: str = '', label: Optional[str] = None, maturity: Optional[dbt.contracts.graph.unparsed.MaturityType] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.ExposureConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, url: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
"description": "Exposure(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], type: dbt.contracts.graph.unparsed.ExposureType, owner: dbt.contracts.graph.unparsed.ExposureOwner, description: str = '', label: Optional[str] = None, maturity: Optional[dbt.contracts.graph.unparsed.MaturityType] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.ExposureConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, url: Optional[str] = None, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, sources: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, entities: List[List[str]] = <factory>, created_at: float = <factory>)"
},
"ExposureOwner": {
"type": "object",
@@ -4353,13 +4451,23 @@
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"created_at": {
"type": "number",
"default": 1675937087.371092
"default": 1670902215.995033
}
},
"additionalProperties": false,
"description": "Metric(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], description: str, label: str, calculation_method: str, expression: str, filters: List[dbt.contracts.graph.unparsed.MetricFilter], time_grains: List[str], dimensions: List[str], timestamp: Optional[str] = None, window: Optional[dbt.contracts.graph.unparsed.MetricTime] = None, model: Optional[str] = None, model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.MetricConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
"description": "Metric(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], description: str, label: str, calculation_method: str, timestamp: str, expression: str, filters: List[dbt.contracts.graph.unparsed.MetricFilter], time_grains: List[str], dimensions: List[str], window: Optional[dbt.contracts.graph.unparsed.MetricTime] = None, model: Optional[str] = None, model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.MetricConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, metrics: List[List[str]] = <factory>, created_at: float = <factory>)"
},
"MetricFilter": {
"type": "object",
@@ -4427,6 +4535,148 @@
},
"additionalProperties": true,
"description": "MetricConfig(_extra: Dict[str, Any] = <factory>, enabled: bool = True)"
},
"Entity": {
"type": "object",
"required": [
"name",
"resource_type",
"package_name",
"path",
"original_file_path",
"unique_id",
"fqn",
"model",
"description",
"dimensions"
],
"properties": {
"name": {
"type": "string"
},
"resource_type": {
"type": "string",
"enum": [
"entity"
]
},
"package_name": {
"type": "string"
},
"path": {
"type": "string"
},
"original_file_path": {
"type": "string"
},
"unique_id": {
"type": "string"
},
"fqn": {
"type": "array",
"items": {
"type": "string"
}
},
"model": {
"type": "string"
},
"description": {
"type": "string"
},
"dimensions": {
"type": "array",
"items": {
"type": "string"
}
},
"model_unique_id": {
"oneOf": [
{
"type": "string"
},
{
"type": "null"
}
]
},
"meta": {
"type": "object",
"default": {}
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"default": []
},
"config": {
"$ref": "#/definitions/EntityConfig",
"default": {
"enabled": true
}
},
"unrendered_config": {
"type": "object",
"default": {}
},
"sources": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"depends_on": {
"$ref": "#/definitions/DependsOn",
"default": {
"macros": [],
"nodes": []
}
},
"refs": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"entities": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string"
}
},
"default": []
},
"created_at": {
"type": "number",
"default": 1674510977.805523
}
},
"additionalProperties": false,
"description": "Entity(name: str, resource_type: dbt.node_types.NodeType, package_name: str, path: str, original_file_path: str, unique_id: str, fqn: List[str], model: str, description: str, dimensions: List[str], model_unique_id: Optional[str] = None, meta: Dict[str, Any] = <factory>, tags: List[str] = <factory>, config: dbt.contracts.graph.model_config.EntityConfig = <factory>, unrendered_config: Dict[str, Any] = <factory>, sources: List[List[str]] = <factory>, depends_on: dbt.contracts.graph.nodes.DependsOn = <factory>, refs: List[List[str]] = <factory>, entities: List[List[str]] = <factory>, created_at: float = <factory>)"
},
"EntityConfig": {
"type": "object",
"required": [],
"properties": {
"enabled": {
"type": "boolean",
"default": true
}
},
"additionalProperties": true,
"description": "EntityConfig(_extra: Dict[str, Any] = <factory>, enabled: bool = True)"
}
},
"$schema": "http://json-schema.org/draft-07/schema#",

View File

@@ -35,6 +35,7 @@ def basic_uncompiled_model():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
deferred=False,
description='',
@@ -67,6 +68,7 @@ def basic_compiled_model():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
deferred=True,
description='',
@@ -124,6 +126,7 @@ def basic_uncompiled_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities':[],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'deferred': False,
@@ -173,6 +176,7 @@ def basic_compiled_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities':[],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'deferred': True,
@@ -363,6 +367,7 @@ def basic_uncompiled_schema_test_node():
refs=[],
sources=[],
metrics=[],
entities=[],
deferred=False,
depends_on=DependsOn(),
description='',
@@ -396,6 +401,7 @@ def basic_compiled_schema_test_node():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
deferred=False,
description='',
@@ -435,6 +441,7 @@ def basic_uncompiled_schema_test_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities':[],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'description': '',
@@ -485,6 +492,7 @@ def basic_compiled_schema_test_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'deferred': False,
'database': 'test_db',

View File

@@ -10,6 +10,8 @@ from dbt.contracts.graph.model_config import (
SnapshotConfig,
SourceConfig,
ExposureConfig,
MetricConfig,
EntityConfig,
EmptySnapshotConfig,
Hook,
)
@@ -24,6 +26,7 @@ from dbt.contracts.graph.nodes import (
Macro,
Exposure,
Metric,
Entity,
SeedNode,
Docs,
MacroDependsOn,
@@ -142,6 +145,7 @@ def base_parsed_model_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'description': '',
@@ -191,6 +195,7 @@ def basic_parsed_model_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -241,6 +246,7 @@ def complex_parsed_model_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
'database': 'test_db',
'deferred': True,
@@ -301,6 +307,7 @@ def complex_parsed_model_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.test.bar']),
deferred=True,
description='My parsed node',
@@ -734,6 +741,7 @@ def patched_model_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='The foo model',
database='test_db',
@@ -794,6 +802,7 @@ def base_parsed_hook_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'deferred': False,
@@ -842,6 +851,7 @@ def base_parsed_hook_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
deferred=False,
@@ -872,6 +882,7 @@ def complex_parsed_hook_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
'deferred': False,
'database': 'test_db',
@@ -931,6 +942,7 @@ def complex_parsed_hook_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.test.bar']),
description='My parsed node',
deferred=False,
@@ -1025,6 +1037,7 @@ def basic_parsed_schema_test_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'deferred': False,
'database': 'test_db',
@@ -1072,6 +1085,7 @@ def basic_parsed_schema_test_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -1101,6 +1115,7 @@ def complex_parsed_schema_test_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': ['model.test.bar']},
'database': 'test_db',
'deferred': False,
@@ -1165,6 +1180,7 @@ def complex_parsed_schema_test_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.test.bar']),
description='My parsed node',
database='test_db',
@@ -1463,6 +1479,7 @@ def basic_timestamp_snapshot_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'deferred': False,
'database': 'test_db',
@@ -1522,6 +1539,7 @@ def basic_timestamp_snapshot_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -1570,6 +1588,7 @@ def basic_intermediate_timestamp_snapshot_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -1605,6 +1624,7 @@ def basic_check_snapshot_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'depends_on': {'macros': [], 'nodes': []},
'database': 'test_db',
'deferred': False,
@@ -1664,6 +1684,7 @@ def basic_check_snapshot_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -1712,6 +1733,7 @@ def basic_intermediate_check_snapshot_object():
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
description='',
database='test_db',
@@ -2152,6 +2174,7 @@ def basic_parsed_exposure_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'fqn': ['test', 'exposures', 'my_exposure'],
'unique_id': 'exposure.test.my_exposure',
'package_name': 'test',
@@ -2214,6 +2237,7 @@ def complex_parsed_exposure_dict():
'refs': [],
'sources': [],
'metrics': [],
'entities': [],
'fqn': ['test', 'exposures', 'my_exposure'],
'unique_id': 'exposure.test.my_exposure',
'package_name': 'test',
@@ -2288,47 +2312,24 @@ def test_compare_changed_exposure(func, basic_parsed_exposure_object):
# METRICS
@pytest.fixture
def minimal_parsed_metric_dict():
return {
'name': 'my_metric',
'type': 'count',
'timestamp': 'created_at',
'time_grains': ['day'],
'fqn': ['test', 'metrics', 'my_metric'],
'unique_id': 'metric.test.my_metric',
'package_name': 'test',
'meta': {},
'tags': [],
'path': 'models/something.yml',
'original_file_path': 'models/something.yml',
'description': '',
'created_at': 1.0,
}
@pytest.fixture
def basic_parsed_metric_dict():
return {
'name': 'new_customers',
'label': 'New Customers',
'model': 'ref("dim_customers")',
'model': "ref('dim_customers')",
'calculation_method': 'count',
'expression': 'user_id',
'timestamp': 'signup_date',
'time_grains': ['day', 'week', 'month'],
'dimensions': ['plan', 'country'],
'filters': [
{
"field": "is_paying",
"value": "true",
"operator": "=",
}
],
'filters': [],
'resource_type': 'metric',
'refs': [['dim_customers']],
'sources': [],
'metrics': [],
'entities': [],
'fqn': ['test', 'metrics', 'my_metric'],
'unique_id': 'metric.test.my_metric',
'package_name': 'test',
@@ -2337,26 +2338,98 @@ def basic_parsed_metric_dict():
'description': '',
'meta': {},
'tags': [],
'created_at': 1,
'depends_on': {
'nodes': [],
'macros': [],
},
'config': {
'enabled': True,
},
'unrendered_config': {},
}
@pytest.fixture
def basic_parsed_metric_object():
return Metric(
name='new_customers',
resource_type=NodeType.Metric,
model="ref('dim_customers')",
label='New Customers',
calculation_method='count',
expression="user_id",
timestamp='signup_date',
time_grains=['day','week','month'],
dimensions=['plan','country'],
filters=[],
refs=[['dim_customers']],
fqn=['test', 'metrics', 'my_metric'],
unique_id='metric.test.my_metric',
package_name='test',
path='models/something.yml',
original_file_path='models/something.yml',
description='',
meta={},
tags=[],
config=MetricConfig(),
unrendered_config={},
)
def test_simple_parsed_metric(basic_parsed_metric_dict, basic_parsed_metric_object):
assert_symmetric(basic_parsed_metric_object, basic_parsed_metric_dict, Metric)
# ENTITIES
@pytest.fixture
def basic_parsed_entity_dict():
return {
'name': 'my_entity',
'model': "ref('my_model')",
'dimensions': [],
'resource_type': 'entity',
'refs': [['my_model']],
'sources': [],
'metrics': [],
'entities': [],
'fqn': ['test', 'entities', 'my_entity'],
'unique_id': 'entity.test.my_entity',
'package_name': 'test',
'path': 'models/something.yml',
'original_file_path': 'models/something.yml',
'description': '',
'meta': {},
'tags': [],
'created_at': 1.0,
'depends_on': {
'nodes': [],
'macros': [],
},
'config': {
'enabled': True,
},
'unrendered_config': {},
}
@pytest.fixture
def basic_parsed_metric_object():
return Metric(
name='my_metric',
resource_type=NodeType.Metric,
calculation_method='count',
fqn=['test', 'metrics', 'my_metric'],
unique_id='metric.test.my_metric',
def basic_parsed_entity_object():
return Entity(
name='my_entity',
model="ref('my_model')",
dimensions=[],
resource_type=NodeType.Entity,
fqn=['test', 'entities', 'my_entity'],
refs=[['my_model']],
unique_id='entity.test.my_entity',
package_name='test',
path='models/something.yml',
original_file_path='models/something.yml',
description='',
meta={},
tags=[]
tags=[],
config=EntityConfig(),
unrendered_config={},
)
def test_simple_parsed_entity(basic_parsed_entity_dict, basic_parsed_entity_object):
assert_symmetric(basic_parsed_entity_object, basic_parsed_entity_dict, Entity)

View File

@@ -786,3 +786,49 @@ class TestUnparsedMetric(ContractTestCase):
tst = self.get_ok_dict()
tst['tags'] = [123]
self.assert_fails_validation(tst)
class TestUnparsedEntity(ContractTestCase):
ContractType = UnparsedEntity
def get_ok_dict(self):
return {
'name': 'my_entity',
'model': "ref('my_model')",
'description': 'my model',
'dimensions': ['plan', 'country'],
'config': {},
'tags': [],
'meta': {},
}
def test_ok(self):
metric = self.ContractType(
name='my_entity',
model="ref('my_model')",
description="my model",
dimensions=['plan', 'country'],
config={},
tags=[],
meta={},
)
dct = self.get_ok_dict()
self.assert_symmetric(metric, dct)
pickle.loads(pickle.dumps(metric))
def test_bad_entity_no_name(self):
tst = self.get_ok_dict()
del tst['name']
self.assert_fails_validation(tst)
def test_bad_entity_no_model(self):
tst = self.get_ok_dict()
del tst['model']
self.assert_fails_validation(tst)
def test_bad_tags(self):
tst = self.get_ok_dict()
tst['tags'] = [123]
self.assert_fails_validation(tst)

View File

@@ -127,6 +127,7 @@ class ManifestTest(unittest.TestCase):
refs=[['multi']],
sources=[],
metrics=[],
entities=[],
fqn=['root', 'my_metric'],
unique_id='metric.root.my_metric',
package_name='root',
@@ -160,6 +161,7 @@ class ManifestTest(unittest.TestCase):
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
config=self.model_config,
tags=[],
@@ -182,6 +184,7 @@ class ManifestTest(unittest.TestCase):
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
config=self.model_config,
tags=[],
@@ -204,6 +207,7 @@ class ManifestTest(unittest.TestCase):
refs=[['events']],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.root.events']),
config=self.model_config,
tags=[],
@@ -226,6 +230,7 @@ class ManifestTest(unittest.TestCase):
refs=[['events']],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.root.dep']),
config=self.model_config,
tags=[],
@@ -248,6 +253,7 @@ class ManifestTest(unittest.TestCase):
refs=[['events']],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.root.events']),
config=self.model_config,
tags=[],
@@ -270,6 +276,7 @@ class ManifestTest(unittest.TestCase):
refs=[['events']],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(nodes=['model.root.nested', 'model.root.sibling']),
config=self.model_config,
tags=[],
@@ -304,6 +311,8 @@ class ManifestTest(unittest.TestCase):
exposure.validate(exposure.to_dict(omit_none=True))
for metric in self.metrics.values():
metric.validate(metric.to_dict(omit_none=True))
for entity in self.entities.values():
entity.validate(entity.to_dict(omit_none=True))
for node in self.nested_nodes.values():
node.validate(node.to_dict(omit_none=True))
for source in self.sources.values():
@@ -319,7 +328,7 @@ class ManifestTest(unittest.TestCase):
def test__no_nodes(self):
manifest = Manifest(
nodes={}, sources={}, macros={}, docs={}, disabled={}, files={},
exposures={}, metrics={}, selectors={},
exposures={}, metrics={}, selectors={}, entities={},
metadata=ManifestMetadata(generated_at=datetime.utcnow()),
)
@@ -354,7 +363,7 @@ class ManifestTest(unittest.TestCase):
nodes = copy.copy(self.nested_nodes)
manifest = Manifest(
nodes=nodes, sources={}, macros={}, docs={}, disabled={}, files={},
exposures={}, metrics={}, selectors={},
exposures={}, metrics={}, entities={}, selectors={},
metadata=ManifestMetadata(generated_at=datetime.utcnow()),
)
serialized = manifest.writable_manifest().to_dict(omit_none=True)
@@ -431,12 +440,14 @@ class ManifestTest(unittest.TestCase):
flat_exposures = flat_graph['exposures']
flat_groups = flat_graph['groups']
flat_metrics = flat_graph['metrics']
flat_entities = flat_graph['entities']
flat_nodes = flat_graph['nodes']
flat_sources = flat_graph['sources']
self.assertEqual(set(flat_graph), set(['exposures', 'groups', 'nodes', 'sources', 'metrics']))
self.assertEqual(set(flat_exposures), set(self.exposures))
self.assertEqual(set(flat_groups), set(self.groups))
self.assertEqual(set(flat_metrics), set(self.metrics))
self.assertEqual(set(flat_entities), set(self.entities))
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
self.assertEqual(set(flat_sources), set(self.sources))
for node in flat_nodes.values():
@@ -532,11 +543,14 @@ class ManifestTest(unittest.TestCase):
)
manifest = Manifest(nodes=nodes, sources=self.sources, macros={}, docs={},
disabled={}, files={}, exposures=self.exposures,
metrics=self.metrics, selectors={})
metrics=self.metrics, entities=self.entities, selectors={})
expect = {
'metrics': frozenset([
('root', 'my_metric')
]),
'entities': frozenset([
('root', 'my_entity')
]),
'exposures': frozenset([
('root', 'my_exposure')
]),
@@ -571,6 +585,7 @@ class ManifestTest(unittest.TestCase):
refs=[],
sources=[],
metrics=[],
entities=[],
depends_on=DependsOn(),
config=self.model_config,
tags=[],
@@ -895,6 +910,7 @@ class TestManifestSearch(unittest.TestCase):
files={},
exposures={},
metrics={},
entities={},
selectors={},
)
@@ -917,6 +933,7 @@ def make_manifest(nodes=[], sources=[], macros=[], docs=[]):
files={},
exposures={},
metrics={},
entities={},
selectors={},
)

View File

@@ -15,6 +15,7 @@ node_type_pluralizations = {
NodeType.Macro: "macros",
NodeType.Exposure: "exposures",
NodeType.Metric: "metrics",
NodeType.Entity: "entities",
NodeType.Group: "groups",
}

4
testing-project/postgres/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
target/
dbt_packages/
logs/

View File

@@ -0,0 +1,32 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'postgres'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'user'
# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.

View File

@@ -0,0 +1,21 @@
with orders as (
select * from {{ ref('fact_orders') }}
)
,
customers as (
select * from {{ ref('dim_customers') }}
)
,
final as (
select *
from orders
left join customers using (customer_id)
)
select * from final

View File

@@ -0,0 +1 @@
select * from {{ref('dim_customers_source')}}

View File

@@ -0,0 +1,21 @@
version: 2
models:
- name: dim_customers
columns:
- name: customer_id
description: TBD
- name: first_name
description: TBD
- name: last_name
description: TBD
- name: email
description: TBD
- name: gender
description: TBD
- name: is_new_customer
description: TBD

View File

@@ -0,0 +1,23 @@
version: 2
entities:
- name: customers
model: ref('dim_customers')
description: "Our customers entity"
identifiers:
- name: customer
type: primary
expr: customer_id
dimensions:
- name: first_name
type: categorical
# description: string
# column_name: first_name
# date_type: string
# default_timestamp: true
# primary_key: true
# time_grains: [day, week, month]

View File

@@ -0,0 +1,20 @@
version: 2
exposures:
- name: weekly_metrics
label: Some Label
type: dashboard
maturity: high
url: https://bi.tool/dashboards/1
description: >
Did someone say "exponential growth"?
depends_on:
- ref('fact_orders')
- ref('dim_customers')
# - metric('revenue')
- entity('orders')
owner:
name: Callum McData
email: data@jaffleshop.com

View File

@@ -0,0 +1,110 @@
version: 2
entities:
- name: orders
model: ref('fact_orders')
description: "Our orders entity"
identifiers:
- name: order_id
type: primary
- name: customer
type: foreign
expr: customer_id
dimensions:
- name: order_date
type: time
type_params:
time_granularity: day
- name: order_date_one
type: time
expr: order_date
type_params:
is_primary: true
time_granularity: day
- name: order_location
type: categorical
expr: order_country
measures:
- name: order_total
description: "The total value of the order"
agg: sum
- name: sales
description: "The total sale of the order"
agg: sum
expr: order_total
create_metric: True
- name: median_sales
description: "The median sale of the order"
agg: median
expr: order_total
create_metric: True
- name: testing_count
description: "Testing count functionality"
agg: count
expr: order_id
create_metric: True
metrics:
- name: revenue
entity: entity('orders')
description: "some description"
type: measure_proxy
type_params:
measure: order_total
- name: testing_metric_constraint
entity: entity('orders')
description: "some description"
type: measure_proxy
type_params:
measure: order_total
constraint: |
order_location = 'Unovo'
- name: sales_minus_revenue
type: derived
type_params:
expr: sales - revenue
metrics:
- sales
- revenue
- name: constraint_derived
type: derived
type_params:
expr: sales - revenue
metrics:
- sales
- name: revenue
constraint: |
order_location = 'Unovo'
- name: cancellation_rate
type: ratio
entity: entity('orders')
type_params:
numerator: order_total
denominator: median_sales
- name: wau_rolling_7
type: cumulative
entity: entity('orders')
type_params:
measure: order_total
window: 7 days
- name: revenue_usd
type: expr
entity: entity('orders')
type_params:
expr: order_total - sales
measures:
- order_total
- sales

View File

@@ -0,0 +1,9 @@
select
order_id
,order_country
,order_total
,had_discount
,customer_id
,to_date(order_date,'MM/DD/YYYY') as order_date
,round(order_total - (order_total/2)) as discount_total
from {{ref('fact_orders_source')}}

View File

@@ -0,0 +1,21 @@
version: 2
models:
- name: fact_orders
columns:
- name: order_id
description: TBD
- name: order_country
description: TBD
- name: order_total
description: TBD
- name: had_discount
description: TBD
- name: customer_id
description: TBD
- name: order_date
description: TBD

View File

@@ -0,0 +1,7 @@
select 1 as test
{# {% set mf = modules.mf %} #}
{# {% set explain_plan = mf.explain(metrics=["sales"]) %} #}
{# {% set explain_plan = mf.explain(self,metrics=["sales"]) %} #}
{# select "{{manifest}}" as explain_plan #}

View File

@@ -0,0 +1,6 @@
customer_id,first_name,last_name,email,gender,is_new_customer,date_added
1,Geodude,Pokemon,rocks@pokemon.org,Male,FALSE,2022-01-01
2,Mew,Pokemon,mew.is.the.best@pokemon.com,Genderfluid,TRUE,2022-01-06
3,Mewtwo,Pokemon,no.mewtwo.is.better@pokemon.com,Genderqueer,FALSE,2022-01-13
4,Charizard,Pokemon,firebreathbaby@pokemon.com,Female,TRUE,2022-02-01
5,Snorlax,Pokemon,sleep@pokemon.com,Male,TRUE,2022-02-03
1 customer_id first_name last_name email gender is_new_customer date_added
2 1 Geodude Pokemon rocks@pokemon.org Male FALSE 2022-01-01
3 2 Mew Pokemon mew.is.the.best@pokemon.com Genderfluid TRUE 2022-01-06
4 3 Mewtwo Pokemon no.mewtwo.is.better@pokemon.com Genderqueer FALSE 2022-01-13
5 4 Charizard Pokemon firebreathbaby@pokemon.com Female TRUE 2022-02-01
6 5 Snorlax Pokemon sleep@pokemon.com Male TRUE 2022-02-03

View File

@@ -0,0 +1,11 @@
order_id,order_country,order_total,had_discount,customer_id,order_date
1,Unovo,2,false,1,01/28/2022
2,Kalos,1,false,2,01/20/2022
3,Kalos,1,false,1,01/13/2022
4,Alola,1,true,3,01/06/2022
5,Alola,1,false,4,01/08/2022
6,Kanto,1,false,5,01/21/2022
7,Alola,1,true,2,01/22/2022
8,Kanto,0,true,1,02/15/2022
9,Unovo,1,false,2,02/03/2022
10,Kanto,1,false,3,02/13/2022
1 order_id order_country order_total had_discount customer_id order_date
2 1 Unovo 2 false 1 01/28/2022
3 2 Kalos 1 false 2 01/20/2022
4 3 Kalos 1 false 1 01/13/2022
5 4 Alola 1 true 3 01/06/2022
6 5 Alola 1 false 4 01/08/2022
7 6 Kanto 1 false 5 01/21/2022
8 7 Alola 1 true 2 01/22/2022
9 8 Kanto 0 true 1 02/15/2022
10 9 Unovo 1 false 2 02/03/2022
11 10 Kanto 1 false 3 02/13/2022

View File

@@ -265,6 +265,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"unique_id": "model.test.model",
"fqn": ["test", "model"],
"metrics": [],
"entities": [],
"tags": [],
"meta": {},
"config": model_config,
@@ -359,6 +360,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"unique_id": "model.test.second_model",
"fqn": ["test", "second_model"],
"metrics": [],
"entities": [],
"tags": [],
"meta": {},
"config": second_config,
@@ -535,6 +537,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"file_key_name": "models.model",
"fqn": ["test", "not_null_model_id"],
"metrics": [],
"entities": [],
"name": "not_null_model_id",
"original_file_path": model_schema_yml_path,
"package_name": "test",
@@ -591,6 +594,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"extra_ctes_injected": True,
"fqn": ["test", "snapshot_seed", "snapshot_seed"],
"metrics": [],
"entities": [],
"meta": {},
"name": "snapshot_seed",
"original_file_path": snapshot_path,
@@ -637,6 +641,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"file_key_name": "models.model",
"fqn": ["test", "test_nothing_model_"],
"metrics": [],
"entities": [],
"name": "test_nothing_model_",
"original_file_path": model_schema_yml_path,
"package_name": "test",
@@ -690,6 +695,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"file_key_name": "models.model",
"fqn": ["test", "unique_model_id"],
"metrics": [],
"entities": [],
"name": "unique_model_id",
"original_file_path": model_schema_yml_path,
"package_name": "test",
@@ -793,6 +799,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"maturity": "medium",
"meta": {"tool": "my_tool", "languages": ["python"]},
"metrics": [],
"entities": [],
"tags": ["my_department"],
"name": "notebook_exposure",
"original_file_path": os.path.join("models", "schema.yml"),
@@ -820,6 +827,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
},
"fqn": ["test", "simple_exposure"],
"metrics": [],
"entities": [],
"name": "simple_exposure",
"original_file_path": os.path.join("models", "schema.yml"),
"owner": {
@@ -841,6 +849,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
},
},
"metrics": {},
"entities": {},
"groups": {},
"selectors": {},
"parent_map": {
@@ -926,6 +935,7 @@ def expected_references_manifest(project):
"fqn": ["test", "ephemeral_copy"],
"group": None,
"metrics": [],
"entities": [],
"name": "ephemeral_copy",
"original_file_path": ephemeral_copy_path,
"package_name": "test",
@@ -987,6 +997,7 @@ def expected_references_manifest(project):
"fqn": ["test", "ephemeral_summary"],
"group": "test_group",
"metrics": [],
"entities": [],
"name": "ephemeral_summary",
"original_file_path": ephemeral_summary_path,
"package_name": "test",
@@ -1051,6 +1062,7 @@ def expected_references_manifest(project):
"fqn": ["test", "view_summary"],
"group": None,
"metrics": [],
"entities": [],
"name": "view_summary",
"original_file_path": view_summary_path,
"package_name": "test",
@@ -1175,6 +1187,7 @@ def expected_references_manifest(project):
"fqn": ["test", "snapshot_seed", "snapshot_seed"],
"group": None,
"metrics": [],
"entities": [],
"meta": {},
"name": "snapshot_seed",
"original_file_path": snapshot_path,
@@ -1263,6 +1276,7 @@ def expected_references_manifest(project):
"maturity": "medium",
"meta": {"tool": "my_tool", "languages": ["python"]},
"metrics": [],
"entities": [],
"tags": ["my_department"],
"name": "notebook_exposure",
"original_file_path": os.path.join("models", "schema.yml"),
@@ -1279,6 +1293,7 @@ def expected_references_manifest(project):
},
},
"metrics": {},
"entities": {},
"groups": {
"group.test.test_group": {
"name": "test_group",

View File

@@ -362,6 +362,7 @@ def verify_manifest(project, expected_manifest, start_time, manifest_schema_path
"child_map",
"group_map",
"metrics",
"entities",
"groups",
"docs",
"metadata",
@@ -389,7 +390,7 @@ def verify_manifest(project, expected_manifest, start_time, manifest_schema_path
and metadata["send_anonymous_usage_stats"] is False
)
assert "adapter_type" in metadata and metadata["adapter_type"] == project.adapter_type
elif key in ["nodes", "sources", "exposures", "metrics", "disabled", "docs"]:
elif key in ["nodes", "sources", "exposures", "metrics", "entities", "disabled", "docs"]:
for unique_id, node in expected_manifest[key].items():
assert unique_id in manifest[key]
assert manifest[key][unique_id] == node, f"{unique_id} did not match"

View File

@@ -318,10 +318,10 @@ class TestPreviousVersionState:
def test_backwards_compatible_versions(self, project):
# manifest schema version 4 and greater should always be forward compatible
for schema_version in range(4, self.CURRENT_EXPECTED_MANIFEST_VERSION):
for schema_version in range(8, self.CURRENT_EXPECTED_MANIFEST_VERSION):
self.compare_previous_state(project, schema_version, True)
def test_nonbackwards_compatible_versions(self, project):
# schema versions 1, 2, 3 are all not forward compatible
for schema_version in range(1, 4):
for schema_version in range(1, 7):
self.compare_previous_state(project, schema_version, False)