Support unit testing models that depend on source with the same name (#12220)

2025-12-17 19:31:34 +00:00 · 2025-11-28 14:32:15 -05:00
parent 518c360a29
commit 2c7f49a71e
6 changed files with 98 additions and 2 deletions
--- a/.changes/unreleased/Fixes-20251127-170124.yaml
+++ b/.changes/unreleased/Fixes-20251127-170124.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: ':bug: :snowman: Support unit testing models that depend on sources with the same name'
+time: 2025-11-27T17:01:24.193516-05:00
+custom:
+    Author: michelleark
+    Issue: 11975 10433
--- a/core/dbt/compilation.py
+++ b/core/dbt/compilation.py
@@ -26,6 +26,7 @@ from dbt.contracts.graph.nodes import (
    SeedNode,
    UnitTestDefinition,
    UnitTestNode,
+    UnitTestSourceDefinition,
 )
 from dbt.events.types import FoundStats, WritingInjectedSQLForNode
 from dbt.exceptions import (
@@ -566,7 +567,12 @@ class Compiler:

            _extend_prepended_ctes(prepended_ctes, new_prepended_ctes)

-            new_cte_name = self.add_ephemeral_prefix(cte_model.identifier)
+            cte_name = (
+                cte_model.cte_name
+                if isinstance(cte_model, UnitTestSourceDefinition)
+                else cte_model.identifier
+            )
+            new_cte_name = self.add_ephemeral_prefix(cte_name)
            rendered_sql = cte_model._pre_injected_sql or cte_model.compiled_code
            sql = f" {new_cte_name} as (\n{rendered_sql}\n)"

--- a/core/dbt/context/providers.py
+++ b/core/dbt/context/providers.py
@@ -854,7 +854,12 @@ class RuntimeUnitTestSourceResolver(BaseSourceResolver):
        # we just need to set_cte, but skipping it confuses typing. We *do* need
        # the relation in the "this" property.
        self.model.set_cte(target_source.unique_id, None)
-        return self.Relation.create_ephemeral_from(target_source)
+
+        identifier = self.Relation.add_ephemeral_prefix(target_source.cte_name)
+        return self.Relation.create(
+            type=self.Relation.CTE,
+            identifier=identifier,
+        ).quote(identifier=False)


 # metric` implementations
--- a/core/dbt/contracts/graph/nodes.py
+++ b/core/dbt/contracts/graph/nodes.py
@@ -1098,6 +1098,10 @@ class UnitTestSourceDefinition(ModelNode):
    source_name: str = "undefined"
    quoting: QuotingResource = field(default_factory=QuotingResource)

+    @property
+    def cte_name(self):
+        return self.unique_id.split(".")[-1]
+
    @property
    def search_name(self):
        return f"{self.source_name}.{self.name}"
--- a/core/dbt/parser/unit_tests.py
+++ b/core/dbt/parser/unit_tests.py
@@ -168,6 +168,10 @@ class UnitTestManifestLoader:
                    **common_fields,
                    source_name=original_input_node.source_name,  # needed for source lookup
                )
+                # In the case of multiple sources with the same name, we add the source schema name to the unique id.
+                # This additionally prevents duplicate CTE names during compilation.
+                input_node.unique_id = f"model.{original_input_node.package_name}.{original_input_node.source_name}__{input_name}"
+
                # Sources need to go in the sources dictionary in order to create the right lookup
                self.unit_test_manifest.sources[input_node.unique_id] = input_node  # type: ignore

--- a/tests/functional/unit_testing/test_ut_sources.py
+++ b/tests/functional/unit_testing/test_ut_sources.py
@@ -1,3 +1,5 @@
+from copy import deepcopy
+
 import pytest

 from dbt.contracts.results import RunStatus, TestStatus
@@ -60,6 +62,40 @@ failing_test_schema_yml = """
 """


+schema_duplicate_source_names_yml = """
+sources:
+  - name: seed_sources
+    schema: "{{ target.schema }}"
+    tables:
+      - name: raw_customers
+  - name: seed_sources_2
+    schema: "{{ target.schema }}_other"
+    tables:
+      - name: raw_customers
+
+unit_tests:
+  - name: test_customers
+    model: customers_duplicate_source_names
+    given:
+      - input: source('seed_sources', 'raw_customers')
+        rows:
+          - {id: 1, first_name: Emily}
+      - input: source('seed_sources_2', 'raw_customers')
+        rows:
+          - {id: 2, first_name: Michelle}
+    expect:
+      rows:
+        - {id: 1, first_name: Emily}
+        - {id: 2, first_name: Michelle}
+"""
+
+customers_duplicate_source_names_sql = """
+select * from {{ source('seed_sources', 'raw_customers') }}
+union all
+select * from {{ source('seed_sources_2', 'raw_customers') }}
+"""
+
+
 class TestUnitTestSourceInput:
    @pytest.fixture(scope="class")
    def seeds(self):
@@ -102,3 +138,38 @@ class TestUnitTestSourceInput:
            elif result.node.unique_id == "unit_test.test.customers.fail_test_customers":
                assert result.status == TestStatus.Fail
        assert len(results) == 6
+
+
+class TestUnitTestSourceInputSameNames:
+    @pytest.fixture(scope="class")
+    def other_schema(self, unique_schema):
+        return unique_schema + "_other"
+
+    @pytest.fixture(scope="class")
+    def profiles_config_update(self, dbt_profile_target, unique_schema, other_schema):
+        outputs = {"default": dbt_profile_target, "otherschema": deepcopy(dbt_profile_target)}
+        outputs["default"]["schema"] = unique_schema
+        outputs["otherschema"]["schema"] = other_schema
+        return {"test": {"outputs": outputs, "target": "default"}}
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "raw_customers.csv": raw_customers_csv,
+        }
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "customers_duplicate_source_names.sql": customers_duplicate_source_names_sql,
+            "sources.yml": schema_duplicate_source_names_yml,
+        }
+
+    def test_source_input_same_names(self, project, other_schema):
+        results = run_dbt(["seed"])
+
+        project.create_test_schema(schema_name=other_schema)
+        results = run_dbt(["seed", "--target", "otherschema"])
+
+        results = run_dbt(["test", "--select", "test_type:unit"])
+        assert len(results) == 1