Compare commits

...

6 Commits

Author SHA1 Message Date
Dave Connors
5a2eef5f2f parsed update 2022-10-06 08:42:39 -05:00
Dave Connors
35dec44a81 none of this works! 2022-10-05 10:24:30 -05:00
Dave Connors
1aef8c0317 Merge branch 'sketch-metric-entities' into feature/model-entity-config 2022-10-05 08:19:44 -05:00
Dave Connors
54e0925706 add is entity as top level model attribute, is_entity_dimension and is_primary_key as column attributes 2022-10-04 17:08:25 -05:00
Dave Connors
a7b50a0762 additional entity work 2022-10-03 14:21:41 -05:00
Dave Connors
cdb823b0b9 start to build node types 2022-10-03 11:01:08 -05:00
9 changed files with 281 additions and 0 deletions

View File

@@ -56,6 +56,7 @@ def print_compile_stats(stats):
NodeType.Source: "source",
NodeType.Exposure: "exposure",
NodeType.Metric: "metric",
NodeType.Entity: "entity",
}
results = {k: 0 for k in names.keys()}
@@ -431,6 +432,8 @@ class Compiler:
self.link_node(linker, exposure, manifest)
for metric in manifest.metrics.values():
self.link_node(linker, metric, manifest)
for entity in manifest.entities.values():
self.link_node(linker, entity, manifest)
cycle = linker.find_cycles()

View File

@@ -229,6 +229,7 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
entities: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
# any macro patches in this file by macro unique_id.

View File

@@ -7,6 +7,7 @@ from dbt.contracts.graph.parsed import (
ParsedModelNode,
ParsedExposure,
ParsedMetric,
ParsedEntity,
ParsedResource,
ParsedRPCNode,
ParsedSqlNode,
@@ -232,4 +233,5 @@ GraphMemberNode = Union[
CompileResultNode,
ParsedExposure,
ParsedMetric,
ParsedEntity,
]

View File

@@ -36,6 +36,7 @@ from dbt.contracts.graph.parsed import (
ParsedGenericTestNode,
ParsedExposure,
ParsedMetric,
ParsedEntity,
HasUniqueID,
UnpatchedSourceDefinition,
ManifestNodes,
@@ -217,6 +218,39 @@ class MetricLookup(dbtClassMixin):
return manifest.metrics[unique_id]
class EntityLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)
def get_unique_id(self, search_name, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)
def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None
def add_entity(self, entity: ParsedEntity):
if entity.search_name not in self.storage:
self.storage[entity.search_name] = {}
self.storage[entity.search_name][entity.package_name] = entity.unique_id
def populate(self, manifest):
for entity in manifest.entities.values():
if hasattr(entity, "name"):
self.add_entity(entity)
def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> ParsedEntity:
if unique_id not in manifest.entities:
raise dbt.exceptions.InternalException(
f"Metric {unique_id} found in cache but not found in manifest"
)
return manifest.entities[unique_id]
# This handles both models/seeds/snapshots and sources/metrics/exposures
class DisabledLookup(dbtClassMixin):
def __init__(self, manifest: "Manifest"):
@@ -466,6 +500,7 @@ class Disabled(Generic[D]):
MaybeMetricNode = Optional[Union[ParsedMetric, Disabled[ParsedMetric]]]
MaybeEntityNode = Optional[Union[ParsedEntity, Disabled[ParsedEntity]]]
MaybeDocumentation = Optional[ParsedDocumentation]
@@ -611,6 +646,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs: MutableMapping[str, ParsedDocumentation] = field(default_factory=dict)
exposures: MutableMapping[str, ParsedExposure] = field(default_factory=dict)
metrics: MutableMapping[str, ParsedMetric] = field(default_factory=dict)
entities: MutableMapping[str, ParsedEntity] = field(default_factory=dict)
selectors: MutableMapping[str, Any] = field(default_factory=dict)
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
metadata: ManifestMetadata = field(default_factory=ManifestMetadata)
@@ -632,6 +668,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_entity_lookup: Optional[EntityLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_disabled_lookup: Optional[DisabledLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
@@ -682,6 +721,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
def update_metric(self, new_metric: ParsedMetric):
_update_into(self.metrics, new_metric)
def update_entity(self, new_entity: ParsedEntity):
_update_into(self.entities, new_entity)
def update_node(self, new_node: ManifestNode):
_update_into(self.nodes, new_node)
@@ -699,6 +741,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
"metrics": {k: v.to_dict(omit_none=False) for k, v in self.metrics.items()},
"nodes": {k: v.to_dict(omit_none=False) for k, v in self.nodes.items()},
"sources": {k: v.to_dict(omit_none=False) for k, v in self.sources.items()},
"entities": {k: v.to_dict(omit_none=False) for k, v in self.entities.items()},
}
def build_disabled_by_file_id(self):
@@ -759,6 +802,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.nodes.values(),
self.sources.values(),
self.metrics.values(),
self.entities.values(),
)
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
@@ -787,6 +831,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs={k: _deepcopy(v) for k, v in self.docs.items()},
exposures={k: _deepcopy(v) for k, v in self.exposures.items()},
metrics={k: _deepcopy(v) for k, v in self.metrics.items()},
entities={k: _deepcopy(v) for k, v in self.entities.items()},
selectors={k: _deepcopy(v) for k, v in self.selectors.items()},
metadata=self.metadata,
disabled={k: _deepcopy(v) for k, v in self.disabled.items()},
@@ -801,6 +846,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.sources.values(),
self.exposures.values(),
self.metrics.values(),
self.entities.values(),
)
)
forward_edges, backward_edges = build_node_edges(edge_members)
@@ -826,6 +872,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
docs=self.docs,
exposures=self.exposures,
metrics=self.metrics,
entities=self.entities,
selectors=self.selectors,
metadata=self.metadata,
disabled=self.disabled,
@@ -847,6 +894,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return self.exposures[unique_id]
elif unique_id in self.metrics:
return self.metrics[unique_id]
elif unique_id in self.entities:
return self.entities[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.InternalException(
@@ -883,6 +932,12 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self._metric_lookup = MetricLookup(self)
return self._metric_lookup
@property
def entity_lookup(self) -> EntityLookup:
if self._entity_lookup is None:
self._entity_lookup = EntityLookup(self)
return self._entity_lookup
def rebuild_ref_lookup(self):
self._ref_lookup = RefableLookup(self)
@@ -983,6 +1038,31 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
return Disabled(disabled[0])
return None
def resolve_entity(
self,
target_entity_name: str,
target_entity_package: Optional[str],
current_project: str,
node_package: str,
) -> MaybeEntityNode:
entity: Optional[ParsedEntity] = None
disabled: Optional[List[ParsedEntity]] = None
candidates = _search_packages(current_project, node_package, target_entity_package)
for pkg in candidates:
entity = self.entity_lookup.find(target_entity_name, pkg, self)
if entity is not None and entity.config.enabled:
return entity
# it's possible that the node is disabled
if disabled is None:
disabled = self.disabled_lookup.find(f"{target_entity_name}", pkg)
if disabled:
return Disabled(disabled[0])
return None
# Called by DocsRuntimeContext.doc
def resolve_doc(
self,
@@ -1108,6 +1188,11 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.metrics[metric.unique_id] = metric
source_file.metrics.append(metric.unique_id)
def add_entity(self, source_file: SchemaSourceFile, entity: ParsedEntity):
_check_duplicates(entity, self.entities)
self.entities[entity.unique_id] = entity
source_file.entities.append(entity.unique_id)
def add_disabled_nofile(self, node: GraphMemberNode):
# There can be multiple disabled nodes for the same unique_id
if node.unique_id in self.disabled:
@@ -1123,6 +1208,8 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
source_file.add_test(node.unique_id, test_from)
if isinstance(node, ParsedMetric):
source_file.metrics.append(node.unique_id)
if isinstance(node, ParsedEntity):
source_file.entities.append(node.unique_id)
if isinstance(node, ParsedExposure):
source_file.exposures.append(node.unique_id)
else:
@@ -1150,6 +1237,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
self.docs,
self.exposures,
self.metrics,
self.entities,
self.selectors,
self.files,
self.metadata,
@@ -1203,6 +1291,9 @@ class WritableManifest(ArtifactMixin):
metrics: Mapping[UniqueID, ParsedMetric] = field(
metadata=dict(description=("The metrics defined in the dbt project and its dependencies"))
)
entities: Mapping[UniqueID, ParsedEntity] = field(
metadata=dict(description=("The entities defined in the dbt project and its dependencies"))
)
selectors: Mapping[UniqueID, Any] = field(
metadata=dict(description=("The selectors defined in selectors.yml"))
)

View File

@@ -368,6 +368,11 @@ class MetricConfig(BaseConfig):
enabled: bool = True
@dataclass
class EntityConfig(BaseConfig):
enabled: bool = True
@dataclass
class ExposureConfig(BaseConfig):
enabled: bool = True
@@ -624,6 +629,7 @@ class SnapshotConfig(EmptySnapshotConfig):
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
NodeType.Metric: MetricConfig,
NodeType.Entity: EntityConfig,
NodeType.Exposure: ExposureConfig,
NodeType.Source: SourceConfig,
NodeType.Seed: SeedConfig,

View File

@@ -38,6 +38,8 @@ from dbt.contracts.graph.unparsed import (
MaturityType,
MetricFilter,
MetricTime,
UnparsedEntity,
EntityRelationship,
)
from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin
from dbt.exceptions import warn_or_error
@@ -54,6 +56,7 @@ from .model_config import (
ExposureConfig,
EmptySnapshotConfig,
SnapshotConfig,
EntityConfig,
)
@@ -64,6 +67,8 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable
meta: Dict[str, Any] = field(default_factory=dict)
data_type: Optional[str] = None
quote: Optional[bool] = None
is_entity_dimension: Optional[bool] = False
is_primary_key: Optional[bool] = False
tags: List[str] = field(default_factory=list)
_extra: Dict[str, Any] = field(default_factory=dict)
@@ -160,6 +165,7 @@ class ParsedNodeMixins(dbtClassMixin):
self.created_at = time.time()
self.description = patch.description
self.columns = patch.columns
self.is_entity = patch.is_entity
def get_materialization(self):
return self.config.materialized
@@ -176,6 +182,17 @@ class ParsedNodeMandatory(UnparsedNode, HasUniqueID, HasFqn, HasRelationMetadata
return self.alias
@dataclass
class ParsedEntityMandatory(UnparsedEntity, HasUniqueID, HasFqn, Replaceable):
alias: str
checksum: FileHash
config: EntityConfig = field(default_factory=EntityConfig)
@property
def identifier(self):
return self.alias
@dataclass
class NodeInfoMixin:
_event_status: Dict[str, Any] = field(default_factory=dict)
@@ -210,6 +227,7 @@ class ParsedNodeDefaults(NodeInfoMixin, ParsedNodeMandatory):
compiled_path: Optional[str] = None
build_path: Optional[str] = None
deferred: bool = False
is_entity: Optional[bool] = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
@@ -227,6 +245,73 @@ class ParsedNodeDefaults(NodeInfoMixin, ParsedNodeMandatory):
return full_path
class ParsedEntityMixins(dbtClassMixin):
resource_type: NodeType
depends_on: DependsOn
config: EntityConfig
@property
def is_refable(self):
return self.resource_type in NodeType.refable()
# will this node map to an object in the database?
@property
def depends_on_nodes(self):
return self.depends_on.nodes
def patch(self, patch: "ParsedEntityPatch"):
"""Given a ParsedEntityPatch, add the new information to the node."""
# explicitly pick out the parts to update so we don't inadvertently
# step on the model name or anything
# Note: config should already be updated
self.patch_path: Optional[str] = patch.file_id
# update created_at so process_docs will run in partial parsing
self.created_at = time.time()
self.description = patch.description
self.columns = patch.columns
self.is_entity = patch.is_entity
self.relationships = patch.relationships
def get_materialization(self):
return self.config.materialized
@dataclass
class EntityInfoMixin:
@property
def entity_info(self):
entity_info = {
"entity_path": getattr(self, "path", None),
"entity_name": getattr(self, "name", None),
"unique_id": getattr(self, "unique_id", None),
"resource_type": str(getattr(self, "resource_type", "")),
}
return entity_info
@dataclass
class ParsedEntityDefaults(EntityInfoMixin, ParsedEntityMandatory):
tags: List[str] = field(default_factory=list)
refs: List[List[str]] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
description: str = field(default="")
columns: Dict[str, ColumnInfo] = field(default_factory=dict)
meta: Dict[str, Any] = field(default_factory=dict)
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
compiled_path: Optional[str] = None
build_path: Optional[str] = None
deferred: bool = False
is_entity: Optional[bool] = True
relationships: Optional[List[EntityRelationship]] = None
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
T = TypeVar("T", bound="ParsedNode")
@@ -497,6 +582,12 @@ class ParsedPatch(HasYamlMetadata, Replaceable):
@dataclass
class ParsedNodePatch(ParsedPatch):
columns: Dict[str, ColumnInfo]
is_entity: Optional[bool]
@dataclass
class ParsedEntityPatch(ParsedNodePatch):
relationships: Optional[List[EntityRelationship]]
@dataclass
@@ -909,6 +1000,44 @@ class ParsedMetric(UnparsedBaseNode, HasUniqueID, HasFqn):
)
@dataclass
class ParsedEntity(UnparsedEntity, ParsedEntityMixins, ParsedEntityDefaults, SerializableType):
name: str
resource_type: NodeType = NodeType.Entity
relationships: List[EntityRelationship] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
config: EntityConfig = field(default_factory=EntityConfig)
unrendered_config: Dict[str, Any] = field(default_factory=dict)
depends_on: DependsOn = field(default_factory=DependsOn)
refs: List[List[str]] = field(default_factory=list)
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
return self.depends_on.nodes
@property
def search_name(self):
return self.name
def same_relationships(self, old: "ParsedEntity") -> bool:
return self.relationships == old.relationships
def same_config(self, old: "ParsedEntity") -> bool:
return self.config.same_contents(
self.unrendered_config,
old.unrendered_config,
)
def same_contents(self, old: Optional["ParsedEntity"]) -> bool:
# existing when it didn't before is a change!
# metadata/tags changes are not "changes"
if old is None:
return True
return self.same_relationships(old) and True
ManifestNodes = Union[
ParsedAnalysisNode,
ParsedSingularTestNode,
@@ -928,5 +1057,6 @@ ParsedResource = Union[
ParsedNode,
ParsedExposure,
ParsedMetric,
ParsedEntity,
ParsedSourceDefinition,
]

View File

@@ -53,6 +53,12 @@ class UnparsedGenericTest(UnparsedBaseNode, HasCode):
resource_type: NodeType = field(metadata={"restrict": [NodeType.Macro]})
@dataclass
class UnparsedEntity(UnparsedBaseNode):
name: str
resource_type: NodeType = field(metadata={"restrict": [NodeType.Entity]})
@dataclass
class UnparsedNode(UnparsedBaseNode, HasCode):
name: str
@@ -88,11 +94,42 @@ class Docs(dbtClassMixin, Replaceable):
node_color: Optional[str] = None
@dataclass
class EntityJoinType(StrEnum):
left_outer = "left_outer"
inner = "inner"
@dataclass
class EntityRelationshipType(StrEnum):
one_to_many = "one_to_many"
one_to_one = "one_to_one"
many_to_one = "many_to_one"
def reverse(self) -> str:
if self == "many_to_one":
return "one_to_many"
elif self == "one_to_many":
return "many_to_one"
else:
return self
@dataclass
class EntityRelationship(dbtClassMixin, Mergeable):
to: str
join_key: str
relationship_type: EntityRelationshipType = field(default_factory=EntityRelationshipType)
join_type: Optional[EntityJoinType] = field(default_factory=EntityJoinType)
@dataclass
class HasDocs(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable):
name: str
description: str = ""
meta: Dict[str, Any] = field(default_factory=dict)
is_entity: Optional[bool] = False
relationships: List[EntityRelationship] = field(default_factory=list)
data_type: Optional[str] = None
docs: Docs = field(default_factory=Docs)
_extra: Dict[str, Any] = field(default_factory=dict)
@@ -152,6 +189,11 @@ class UnparsedNodeUpdate(HasConfig, HasColumnTests, HasTests, HasYamlMetadata):
quote_columns: Optional[bool] = None
@dataclass
class UnparsedEntityUpdate(HasConfig, HasColumnTests, HasDocs, HasYamlMetadata):
pass
@dataclass
class MacroArgument(dbtClassMixin):
name: str

View File

@@ -18,6 +18,7 @@ class NodeType(StrEnum):
Macro = "macro"
Exposure = "exposure"
Metric = "metric"
Entity = "entity"
@classmethod
def executable(cls) -> List["NodeType"]:
@@ -52,11 +53,14 @@ class NodeType(StrEnum):
cls.Analysis,
cls.Exposure,
cls.Metric,
cls.Entity,
]
def pluralize(self) -> str:
if self is self.Analysis:
return "analyses"
elif self is self.Entity:
return "entities"
return f"{self}s"

View File

@@ -866,6 +866,8 @@ class NodePatchParser(NonSourceParser[NodeTarget, ParsedNodePatch], Generic[Node
meta=block.target.meta,
docs=block.target.docs,
config=block.target.config,
is_entity=block.target.is_entity,
realtionships=block.target.relationships,
)
assert isinstance(self.yaml.file, SchemaSourceFile)
source_file: SchemaSourceFile = self.yaml.file