Update _sort_values, build_node_edges, and build_macro_edges to use sets

This is just a companion commit to previous commit. It isn't strictly necessary, and if it is considered too dangerous, it can be dropped from the PR if need be.
Fix the shallow copy creation of the depends_on_nodes property in build_node_edges
2025-12-21 17:01:28 +00:00 · 2024-01-25 17:38:17 -08:00 · 2024-01-25 17:33:39 -08:00 · 2024-01-25 17:13:14 -08:00 · 2024-01-25 17:07:03 -08:00 · 2024-01-25 17:04:55 -08:00
5 changed files with 40 additions and 47 deletions
--- a/core/dbt/clients/jinja.py
+++ b/core/dbt/clients/jinja.py
@@ -71,7 +71,7 @@ class MacroGenerator(CallableMacroGenerator):
            depth = self.stack.depth
            # only mark depth=0 as a dependency, when creating this dependency we don't pass in stack
            if depth == 0 and self.node:
-                self.node.depends_on.add_macro(unique_id)
+                self.node.depends_on.macros.add(unique_id)
            self.stack.push(unique_id)
            try:
                yield
--- a/core/dbt/contracts/graph/manifest.py
+++ b/core/dbt/contracts/graph/manifest.py
@@ -455,38 +455,40 @@ def _packages_to_search(
        return [current_project, node_package, None]


-def _sort_values(dct):
+def _sort_values(dct: Dict[str, Set[str]]) -> Dict[str, List[str]]:
    """Given a dictionary, sort each value. This makes output deterministic,
    which helps for tests.
    """
-    return {k: sorted(v) for k, v in dct.items()}
+    return {k: sorted(list(v)) for k, v in dct.items()}


-def build_node_edges(nodes: List[ManifestNode]):
+def build_node_edges(
+    nodes: List[ManifestNode],
+) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]:
    """Build the forward and backward edges on the given list of ManifestNodes
    and return them as two separate dictionaries, each mapping unique IDs to
    lists of edges.
    """
-    backward_edges: Dict[str, List[str]] = {}
+    backward_edges: Dict[str, Set[str]] = {}
    # pre-populate the forward edge dict for simplicity
-    forward_edges: Dict[str, List[str]] = {n.unique_id: [] for n in nodes}
+    forward_edges: Dict[str, Set[str]] = {n.unique_id: set() for n in nodes}
    for node in nodes:
-        backward_edges[node.unique_id] = node.depends_on_nodes[:]
+        backward_edges[node.unique_id] = node.depends_on_nodes.copy()
        for unique_id in backward_edges[node.unique_id]:
            if unique_id in forward_edges.keys():
-                forward_edges[unique_id].append(node.unique_id)
+                forward_edges[unique_id].add(node.unique_id)
    return _sort_values(forward_edges), _sort_values(backward_edges)


 # Build a map of children of macros and generic tests
 def build_macro_edges(nodes: List[Any]):
-    forward_edges: Dict[str, List[str]] = {
-        n.unique_id: [] for n in nodes if n.unique_id.startswith("macro") or n.depends_on_macros
+    forward_edges: Dict[str, Set[str]] = {
+        n.unique_id: set() for n in nodes if n.unique_id.startswith("macro") or n.depends_on_macros
    }
    for node in nodes:
        for unique_id in node.depends_on_macros:
            if unique_id in forward_edges.keys():
-                forward_edges[unique_id].append(node.unique_id)
+                forward_edges[unique_id].add(node.unique_id)
    return _sort_values(forward_edges)


--- a/core/dbt/contracts/graph/nodes.py
+++ b/core/dbt/contracts/graph/nodes.py
@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
 import hashlib

 from mashumaro.types import SerializableType
-from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Literal
+from typing import Optional, Union, List, Dict, Any, Sequence, Tuple, Iterator, Literal, Set

 from dbt import deprecations
 from dbt_common.contracts.constraints import (
@@ -244,12 +244,7 @@ class HasRelationMetadata(dbtClassMixin, Replaceable):
 class MacroDependsOn(dbtClassMixin, Replaceable):
    """Used only in the Macro class"""

-    macros: List[str] = field(default_factory=list)
-
-    # 'in' on lists is O(n) so this is O(n^2) for # of macros
-    def add_macro(self, value: str):
-        if value not in self.macros:
-            self.macros.append(value)
+    macros: Set[str] = field(default_factory=set)


@dataclass
@@ -264,11 +259,7 @@ class DeferRelation(HasRelationMetadata):

@dataclass
 class DependsOn(MacroDependsOn):
-    nodes: List[str] = field(default_factory=list)
-
-    def add_node(self, value: str):
-        if value not in self.nodes:
-            self.nodes.append(value)
+    nodes: Set[str] = field(default_factory=set)


@dataclass
@@ -530,11 +521,11 @@ class CompiledNode(ParsedNode):
        return dct

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    @property
-    def depends_on_macros(self):
+    def depends_on_macros(self) -> Set[str]:
        return self.depends_on.macros


@@ -594,7 +585,7 @@ class ModelNode(CompiledNode):
            original_file_path="",
            path="",
            unrendered_config=unrendered_config,
-            depends_on=DependsOn(nodes=args.depends_on_nodes),
+            depends_on=DependsOn(nodes=set(args.depends_on_nodes)),
            config=ModelConfig(enabled=args.enabled),
        )

@@ -944,11 +935,11 @@ Error raised for '{self.unique_id}', which has these hooks defined: \n{hook_list
        return self.same_seeds(other)

    @property
-    def depends_on_nodes(self):
-        return []
+    def depends_on_nodes(self) -> Set[str]:
+        return set()

    @property
-    def depends_on_macros(self) -> List[str]:
+    def depends_on_macros(self) -> Set[str]:
        return self.depends_on.macros

    @property
@@ -1086,7 +1077,7 @@ class UnitTestDefinition(NodeInfoMixin, GraphNode, UnitTestDefinitionMandatory):
        return self.original_file_path

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    @property
@@ -1169,7 +1160,7 @@ class Macro(BaseNode):
        return self.macro_sql == other.macro_sql

    @property
-    def depends_on_macros(self):
+    def depends_on_macros(self) -> Set[str]:
        return self.depends_on.macros


@@ -1391,8 +1382,8 @@ class SourceDefinition(NodeInfoMixin, ParsedSourceMandatory):
        return False

    @property
-    def depends_on_nodes(self):
-        return []
+    def depends_on_nodes(self) -> Set[str]:
+        return set()

    @property
    def depends_on(self):
@@ -1444,7 +1435,7 @@ class Exposure(GraphNode):
    created_at: float = field(default_factory=lambda: time.time())

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    @property
@@ -1594,7 +1585,7 @@ class Metric(GraphNode):
    group: Optional[str] = None

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    @property
@@ -1749,11 +1740,11 @@ class SemanticModel(GraphNode):
        return SemanticModelReference(semantic_model_name=self.name)

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    @property
-    def depends_on_macros(self):
+    def depends_on_macros(self) -> Set[str]:
        return self.depends_on.macros

    def checked_agg_time_dimension_for_measure(
@@ -1867,7 +1858,7 @@ class SavedQuery(NodeInfoMixin, SavedQueryMandatory):
        return self.query_params.metrics

    @property
-    def depends_on_nodes(self):
+    def depends_on_nodes(self) -> Set[str]:
        return self.depends_on.nodes

    def same_metrics(self, old: "SavedQuery") -> bool:
--- a/core/dbt/parser/manifest.py
+++ b/core/dbt/parser/manifest.py
@@ -744,7 +744,7 @@ class ManifestLoader:
                    package_name, macro_name = macro_name.split(".")
                dep_macro_id = self.macro_resolver.get_macro_id(package_name, macro_name)
                if dep_macro_id:
-                    macro.depends_on.add_macro(dep_macro_id)  # will check for dupes
+                    macro.depends_on.macros.add(dep_macro_id)  # will check for dupes

    def write_manifest_for_partial_parse(self):
        path = os.path.join(self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME)
@@ -1532,7 +1532,7 @@ def _process_refs(
            )

        target_model_id = target_model.unique_id
-        node.depends_on.add_node(target_model_id)
+        node.depends_on.nodes.add(target_model_id)


 def _process_metric_depends_on(
@@ -1560,7 +1560,7 @@ def _process_metric_depends_on(
                node=metric,
            )

-        metric.depends_on.add_node(target_semantic_model.unique_id)
+        metric.depends_on.nodes.add(target_semantic_model.unique_id)


 def _process_metric_node(
@@ -1628,7 +1628,7 @@ def _process_metric_node(
                manifest=manifest, current_project=current_project, metric=target_metric
            )
            metric.type_params.input_measures.extend(target_metric.type_params.input_measures)
-            metric.depends_on.add_node(target_metric.unique_id)
+            metric.depends_on.nodes.add(target_metric.unique_id)
    else:
        assert_values_exhausted(metric.type)

@@ -1684,7 +1684,7 @@ def _process_metrics_for_node(

        target_metric_id = target_metric.unique_id

-        node.depends_on.add_node(target_metric_id)
+        node.depends_on.nodes.add(target_metric_id)


 def remove_dependent_project_references(manifest, external_node_unique_id):
@@ -1715,7 +1715,7 @@ def _process_sources_for_exposure(manifest: Manifest, current_project: str, expo
            )
            continue
        target_source_id = target_source.unique_id
-        exposure.depends_on.add_node(target_source_id)
+        exposure.depends_on.nodes.add(target_source_id)


 def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
@@ -1737,7 +1737,7 @@ def _process_sources_for_metric(manifest: Manifest, current_project: str, metric
            )
            continue
        target_source_id = target_source.unique_id
-        metric.depends_on.add_node(target_source_id)
+        metric.depends_on.nodes.add(target_source_id)


 def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
@@ -1764,7 +1764,7 @@ def _process_sources_for_node(manifest: Manifest, current_project: str, node: Ma
            )
            continue
        target_source_id = target_source.unique_id
-        node.depends_on.add_node(target_source_id)
+        node.depends_on.nodes.add(target_source_id)


 # This is called in task.rpc.sql_commands when a "dynamic" node is
--- a/core/dbt/parser/schema_generic_tests.py
+++ b/core/dbt/parser/schema_generic_tests.py
@@ -273,7 +273,7 @@ class SchemaGenericTestParser(SimpleParser):
        )
        # Add the depends_on here so we can limit the macros added
        # to the context in rendering processing
-        node.depends_on.add_macro(macro_unique_id)
+        node.depends_on.macros.add(macro_unique_id)
        if macro_unique_id in ["macro.dbt.test_not_null", "macro.dbt.test_unique"]:
            config_call_dict = builder.get_static_config()
            config._config_call_dict = config_call_dict
Author	SHA1	Message	Date
Quigley Malcolm	0fadc4d9ec	Update `_sort_values`, `build_node_edges`, and `build_macro_edges` to use sets This is just a companion commit to previous commit. It isn't strictly necessary, and if it is considered too dangerous, it can be dropped from the PR if need be.	2024-01-25 17:38:17 -08:00
Quigley Malcolm	8afcd0bb51	Fix the shallow copy creation of the `depends_on_nodes` property in `build_node_edges` This was necessary after converting the `depends_on.nodes`, `depends_on.macros`, `depends_on_macros`, and `depends_on_nodes` to use sets instead of lists. Doing so unbreaks 9 of the 13 unit tests that got broken on the switch to sets	2024-01-25 17:33:39 -08:00
Quigley Malcolm	9f47565514	Fix return typing of `depends_on_nodes` and `depends_on_macros`	2024-01-25 17:13:14 -08:00
Quigley Malcolm	942a5397cc	Refactor `MacroDependsOn` to use a `set` for `nodes` property	2024-01-25 17:07:03 -08:00
Quigley Malcolm	be8ae155d6	Refactor `DependsOn` to use a `set` for `nodes` property	2024-01-25 17:04:55 -08:00