Play with sets

2025-12-20 21:41:28 +00:00 · 2023-01-06 13:13:38 +01:00
3 changed files with 20 additions and 16 deletions
--- a/core/dbt/contracts/graph/manifest.py
+++ b/core/dbt/contracts/graph/manifest.py
@@ -328,7 +328,7 @@ def build_node_edges(nodes: List[ManifestNode]):
    # pre-populate the forward edge dict for simplicity
    forward_edges: Dict[str, List[str]] = {n.unique_id: [] for n in nodes}
    for node in nodes:
-        backward_edges[node.unique_id] = node.depends_on_nodes[:]
+        backward_edges[node.unique_id] = list(node.depends_on_nodes)
        for unique_id in node.depends_on_nodes:
            if unique_id in forward_edges.keys():
                forward_edges[unique_id].append(node.unique_id)
--- a/core/dbt/contracts/graph/nodes.py
+++ b/core/dbt/contracts/graph/nodes.py
@@ -11,6 +11,7 @@ from typing import (
    Sequence,
    Tuple,
    Iterator,
+    Set,
 )

 from dbt.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
@@ -170,21 +171,24 @@ class HasRelationMetadata(dbtClassMixin, Replaceable):
 class MacroDependsOn(dbtClassMixin, Replaceable):
    """Used only in the Macro class"""

-    macros: List[str] = field(default_factory=list)
+    # this would be cool!
+    # but dbtClassMixin (via hologram) doesn't currently support Set as a JSON-serializable type
+    # yielding:
+    #     Parsing Error
+    #       at path []: Unable to create schema for 'Set'
+    macros: Set[str] = field(default_factory=lambda: set())

-    # 'in' on lists is O(n) so this is O(n^2) for # of macros
+    # this should be O(1): https://wiki.python.org/moin/TimeComplexity
    def add_macro(self, value: str):
-        if value not in self.macros:
-            self.macros.append(value)
+        self.macros.add(value)


@dataclass
 class DependsOn(MacroDependsOn):
-    nodes: List[str] = field(default_factory=list)
+    nodes: Set[str] = field(default_factory=lambda: set())

    def add_node(self, value: str):
-        if value not in self.nodes:
-            self.nodes.append(value)
+        self.nodes.add(value)


@dataclass
--- a/core/dbt/parser/manifest.py
+++ b/core/dbt/parser/manifest.py
@@ -950,7 +950,7 @@ class ManifestLoader:
                        if not self.manifest.disabled[node.unique_id]:
                            self.manifest.disabled.pop(node.unique_id)

-                    self.manifest.add_node_nofile(node)
+                    self.manifest.add_nofile(node)

        self.manifest.rebuild_ref_lookup()

@@ -1138,7 +1138,7 @@ def _process_refs_for_exposure(manifest: Manifest, current_project: str, exposur

        target_model_id = target_model.unique_id

-        exposure.depends_on.nodes.append(target_model_id)
+        exposure.depends_on.nodes.add(target_model_id)
        manifest.update_exposure(exposure)


@@ -1180,7 +1180,7 @@ def _process_refs_for_metric(manifest: Manifest, current_project: str, metric: M

        target_model_id = target_model.unique_id

-        metric.depends_on.nodes.append(target_model_id)
+        metric.depends_on.nodes.add(target_model_id)
        manifest.update_metric(metric)


@@ -1230,7 +1230,7 @@ def _process_metrics_for_node(

        target_metric_id = target_metric.unique_id

-        node.depends_on.nodes.append(target_metric_id)
+        node.depends_on.nodes.add(target_metric_id)


 def _process_refs_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
@@ -1275,7 +1275,7 @@ def _process_refs_for_node(manifest: Manifest, current_project: str, node: Manif

        target_model_id = target_model.unique_id

-        node.depends_on.nodes.append(target_model_id)
+        node.depends_on.nodes.add(target_model_id)
        # TODO: I think this is extraneous, node should already be the same
        # as manifest.nodes[node.unique_id] (we're mutating node here, not
        # making a new one)
@@ -1302,7 +1302,7 @@ def _process_sources_for_exposure(manifest: Manifest, current_project: str, expo
            )
            continue
        target_source_id = target_source.unique_id
-        exposure.depends_on.nodes.append(target_source_id)
+        exposure.depends_on.nodes.add(target_source_id)
        manifest.update_exposure(exposure)


@@ -1325,7 +1325,7 @@ def _process_sources_for_metric(manifest: Manifest, current_project: str, metric
            )
            continue
        target_source_id = target_source.unique_id
-        metric.depends_on.nodes.append(target_source_id)
+        metric.depends_on.nodes.add(target_source_id)
        manifest.update_metric(metric)


@@ -1354,7 +1354,7 @@ def _process_sources_for_node(manifest: Manifest, current_project: str, node: Ma
            )
            continue
        target_source_id = target_source.unique_id
-        node.depends_on.nodes.append(target_source_id)
+        node.depends_on.nodes.add(target_source_id)
        manifest.update_node(node)