Compare commits

...

5 Commits

Author SHA1 Message Date
Quigley Malcolm
0fadc4d9ec Update _sort_values, build_node_edges, and build_macro_edges to use sets
This is just a companion commit to previous commit. It isn't strictly necessary,
and if it is considered too dangerous, it can be dropped from the PR if need be.
2024-01-25 17:38:17 -08:00
Quigley Malcolm
8afcd0bb51 Fix the shallow copy creation of the depends_on_nodes property in build_node_edges
This was necessary after converting the `depends_on.nodes`, `depends_on.macros`,
`depends_on_macros`, and `depends_on_nodes` to use sets instead of lists. Doing so
unbreaks 9 of the 13 unit tests that got broken on the switch to sets
2024-01-25 17:33:39 -08:00
Quigley Malcolm
9f47565514 Fix return typing of depends_on_nodes and depends_on_macros 2024-01-25 17:13:14 -08:00
Quigley Malcolm
942a5397cc Refactor MacroDependsOn to use a set for nodes property 2024-01-25 17:07:03 -08:00
Quigley Malcolm
be8ae155d6 Refactor DependsOn to use a set for nodes property 2024-01-25 17:04:55 -08:00
5 changed files with 40 additions and 47 deletions

View File

@@ -71,7 +71,7 @@ class MacroGenerator(CallableMacroGenerator):
depth = self.stack.depth
# only mark depth=0 as a dependency, when creating this dependency we don't pass in stack
if depth == 0 and self.node:
self.node.depends_on.add_macro(unique_id)
self.node.depends_on.macros.add(unique_id)
self.stack.push(unique_id)
try:
yield

View File

@@ -455,38 +455,40 @@ def _packages_to_search(
return [current_project, node_package, None]
def _sort_values(dct):
def _sort_values(dct: Dict[str, Set[str]]) -> Dict[str, List[str]]:
"""Given a dictionary, sort each value. This makes output deterministic,
which helps for tests.
"""
return {k: sorted(v) for k, v in dct.items()}
return {k: sorted(list(v)) for k, v in dct.items()}
def build_node_edges(nodes: List[ManifestNode]):
def build_node_edges(
nodes: List[ManifestNode],
) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
"""Build the forward and backward edges on the given list of ManifestNodes
and return them as two separate dictionaries, each mapping unique IDs to
lists of edges.
"""
backward_edges: Dict[str, List[str]] = {}
backward_edges: Dict[str, Set[str]] = {}
# pre-populate the forward edge dict for simplicity
forward_edges: Dict[str, List[str]] = {n.unique_id: [] for n in nodes}
forward_edges: Dict[str, Set[str]] = {n.unique_id: set() for n in nodes}
for node in nodes:
backward_edges[node.unique_id] = node.depends_on_nodes[:]
backward_edges[node.unique_id] = node.depends_on_nodes.copy()
for unique_id in backward_edges[node.unique_id]:
if unique_id in forward_edges.keys():
forward_edges[unique_id].append(node.unique_id)
forward_edges[unique_id].add(node.unique_id)
return _sort_values(forward_edges), _sort_values(backward_edges)
# Build a map of children of macros and generic tests
def build_macro_edges(nodes: List[Any]):
forward_edges: Dict[str, List[str]] = {
n.unique_id: [] for n in nodes if n.unique_id.startswith("macro") or n.depends_on_macros
forward_edges: Dict[str, Set[str]] = {
n.unique_id: set() for n in nodes if n.unique_id.startswith("macro") or n.depends_on_macros
}
for node in nodes:
for unique_id in node.depends_on_macros:
if unique_id in forward_edges.keys():
forward_edges[unique_id].append(node.unique_id)
forward_edges[unique_id].add(node.unique_id)
return _sort_values(forward_edges)

View File

@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
import hashlib
from mashumaro.types import SerializableType
from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Literal
from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Literal, Set
from dbt import deprecations
from dbt_common.contracts.constraints import (
@@ -244,12 +244,7 @@ class HasRelationMetadata(dbtClassMixin, Replaceable):
class MacroDependsOn(dbtClassMixin, Replaceable):
"""Used only in the Macro class"""
macros: List[str] = field(default_factory=list)
# 'in' on lists is O(n) so this is O(n^2) for # of macros
def add_macro(self, value: str):
if value not in self.macros:
self.macros.append(value)
macros: Set[str] = field(default_factory=set)
@dataclass
@@ -264,11 +259,7 @@ class DeferRelation(HasRelationMetadata):
@dataclass
class DependsOn(MacroDependsOn):
nodes: List[str] = field(default_factory=list)
def add_node(self, value: str):
if value not in self.nodes:
self.nodes.append(value)
nodes: Set[str] = field(default_factory=set)
@dataclass
@@ -530,11 +521,11 @@ class CompiledNode(ParsedNode):
return dct
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
@property
def depends_on_macros(self):
def depends_on_macros(self) -> Set[str]:
return self.depends_on.macros
@@ -594,7 +585,7 @@ class ModelNode(CompiledNode):
original_file_path="",
path="",
unrendered_config=unrendered_config,
depends_on=DependsOn(nodes=args.depends_on_nodes),
depends_on=DependsOn(nodes=set(args.depends_on_nodes)),
config=ModelConfig(enabled=args.enabled),
)
@@ -944,11 +935,11 @@ Error raised for '{self.unique_id}', which has these hooks defined: \n{hook_list
return self.same_seeds(other)
@property
def depends_on_nodes(self):
return []
def depends_on_nodes(self) -> Set[str]:
return set()
@property
def depends_on_macros(self) -> List[str]:
def depends_on_macros(self) -> Set[str]:
return self.depends_on.macros
@property
@@ -1086,7 +1077,7 @@ class UnitTestDefinition(NodeInfoMixin, GraphNode, UnitTestDefinitionMandatory):
return self.original_file_path
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
@property
@@ -1169,7 +1160,7 @@ class Macro(BaseNode):
return self.macro_sql == other.macro_sql
@property
def depends_on_macros(self):
def depends_on_macros(self) -> Set[str]:
return self.depends_on.macros
@@ -1391,8 +1382,8 @@ class SourceDefinition(NodeInfoMixin, ParsedSourceMandatory):
return False
@property
def depends_on_nodes(self):
return []
def depends_on_nodes(self) -> Set[str]:
return set()
@property
def depends_on(self):
@@ -1444,7 +1435,7 @@ class Exposure(GraphNode):
created_at: float = field(default_factory=lambda: time.time())
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
@property
@@ -1594,7 +1585,7 @@ class Metric(GraphNode):
group: Optional[str] = None
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
@property
@@ -1749,11 +1740,11 @@ class SemanticModel(GraphNode):
return SemanticModelReference(semantic_model_name=self.name)
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
@property
def depends_on_macros(self):
def depends_on_macros(self) -> Set[str]:
return self.depends_on.macros
def checked_agg_time_dimension_for_measure(
@@ -1867,7 +1858,7 @@ class SavedQuery(NodeInfoMixin, SavedQueryMandatory):
return self.query_params.metrics
@property
def depends_on_nodes(self):
def depends_on_nodes(self) -> Set[str]:
return self.depends_on.nodes
def same_metrics(self, old: "SavedQuery") -> bool:

View File

@@ -744,7 +744,7 @@ class ManifestLoader:
package_name, macro_name = macro_name.split(".")
dep_macro_id = self.macro_resolver.get_macro_id(package_name, macro_name)
if dep_macro_id:
macro.depends_on.add_macro(dep_macro_id) # will check for dupes
macro.depends_on.macros.add(dep_macro_id) # will check for dupes
def write_manifest_for_partial_parse(self):
path = os.path.join(self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME)
@@ -1532,7 +1532,7 @@ def _process_refs(
)
target_model_id = target_model.unique_id
node.depends_on.add_node(target_model_id)
node.depends_on.nodes.add(target_model_id)
def _process_metric_depends_on(
@@ -1560,7 +1560,7 @@ def _process_metric_depends_on(
node=metric,
)
metric.depends_on.add_node(target_semantic_model.unique_id)
metric.depends_on.nodes.add(target_semantic_model.unique_id)
def _process_metric_node(
@@ -1628,7 +1628,7 @@ def _process_metric_node(
manifest=manifest, current_project=current_project, metric=target_metric
)
metric.type_params.input_measures.extend(target_metric.type_params.input_measures)
metric.depends_on.add_node(target_metric.unique_id)
metric.depends_on.nodes.add(target_metric.unique_id)
else:
assert_values_exhausted(metric.type)
@@ -1684,7 +1684,7 @@ def _process_metrics_for_node(
target_metric_id = target_metric.unique_id
node.depends_on.add_node(target_metric_id)
node.depends_on.nodes.add(target_metric_id)
def remove_dependent_project_references(manifest, external_node_unique_id):
@@ -1715,7 +1715,7 @@ def _process_sources_for_exposure(manifest: Manifest, current_project: str, expo
)
continue
target_source_id = target_source.unique_id
exposure.depends_on.add_node(target_source_id)
exposure.depends_on.nodes.add(target_source_id)
def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
@@ -1737,7 +1737,7 @@ def _process_sources_for_metric(manifest: Manifest, current_project: str, metric
)
continue
target_source_id = target_source.unique_id
metric.depends_on.add_node(target_source_id)
metric.depends_on.nodes.add(target_source_id)
def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
@@ -1764,7 +1764,7 @@ def _process_sources_for_node(manifest: Manifest, current_project: str, node: Ma
)
continue
target_source_id = target_source.unique_id
node.depends_on.add_node(target_source_id)
node.depends_on.nodes.add(target_source_id)
# This is called in task.rpc.sql_commands when a "dynamic" node is

View File

@@ -273,7 +273,7 @@ class SchemaGenericTestParser(SimpleParser):
)
# Add the depends_on here so we can limit the macros added
# to the context in rendering processing
node.depends_on.add_macro(macro_unique_id)
node.depends_on.macros.add(macro_unique_id)
if macro_unique_id in ["macro.dbt.test_not_null", "macro.dbt.test_unique"]:
config_call_dict = builder.get_static_config()
config._config_call_dict = config_call_dict