Support unit testing models that depend on source with the same name (#12220)

This commit is contained in:
Michelle Ark
2025-11-28 14:32:15 -05:00
committed by GitHub
parent 518c360a29
commit 2c7f49a71e
6 changed files with 98 additions and 2 deletions

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: ':bug: :snowman: Support unit testing models that depend on sources with the same name'
time: 2025-11-27T17:01:24.193516-05:00
custom:
Author: michelleark
Issue: 11975 10433

View File

@@ -26,6 +26,7 @@ from dbt.contracts.graph.nodes import (
SeedNode,
UnitTestDefinition,
UnitTestNode,
UnitTestSourceDefinition,
)
from dbt.events.types import FoundStats, WritingInjectedSQLForNode
from dbt.exceptions import (
@@ -566,7 +567,12 @@ class Compiler:
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
new_cte_name = self.add_ephemeral_prefix(cte_model.identifier)
cte_name = (
cte_model.cte_name
if isinstance(cte_model, UnitTestSourceDefinition)
else cte_model.identifier
)
new_cte_name = self.add_ephemeral_prefix(cte_name)
rendered_sql = cte_model._pre_injected_sql or cte_model.compiled_code
sql = f" {new_cte_name} as (\n{rendered_sql}\n)"

View File

@@ -854,7 +854,12 @@ class RuntimeUnitTestSourceResolver(BaseSourceResolver):
# we just need to set_cte, but skipping it confuses typing. We *do* need
# the relation in the "this" property.
self.model.set_cte(target_source.unique_id, None)
return self.Relation.create_ephemeral_from(target_source)
identifier = self.Relation.add_ephemeral_prefix(target_source.cte_name)
return self.Relation.create(
type=self.Relation.CTE,
identifier=identifier,
).quote(identifier=False)
# metric` implementations

View File

@@ -1098,6 +1098,10 @@ class UnitTestSourceDefinition(ModelNode):
source_name: str = "undefined"
quoting: QuotingResource = field(default_factory=QuotingResource)
@property
def cte_name(self):
return self.unique_id.split(".")[-1]
@property
def search_name(self):
return f"{self.source_name}.{self.name}"

View File

@@ -168,6 +168,10 @@ class UnitTestManifestLoader:
**common_fields,
source_name=original_input_node.source_name, # needed for source lookup
)
# In the case of multiple sources with the same name, we add the source schema name to the unique id.
# This additionally prevents duplicate CTE names during compilation.
input_node.unique_id = f"model.{original_input_node.package_name}.{original_input_node.source_name}__{input_name}"
# Sources need to go in the sources dictionary in order to create the right lookup
self.unit_test_manifest.sources[input_node.unique_id] = input_node # type: ignore

View File

@@ -1,3 +1,5 @@
from copy import deepcopy
import pytest
from dbt.contracts.results import RunStatus, TestStatus
@@ -60,6 +62,40 @@ failing_test_schema_yml = """
"""
schema_duplicate_source_names_yml = """
sources:
- name: seed_sources
schema: "{{ target.schema }}"
tables:
- name: raw_customers
- name: seed_sources_2
schema: "{{ target.schema }}_other"
tables:
- name: raw_customers
unit_tests:
- name: test_customers
model: customers_duplicate_source_names
given:
- input: source('seed_sources', 'raw_customers')
rows:
- {id: 1, first_name: Emily}
- input: source('seed_sources_2', 'raw_customers')
rows:
- {id: 2, first_name: Michelle}
expect:
rows:
- {id: 1, first_name: Emily}
- {id: 2, first_name: Michelle}
"""
customers_duplicate_source_names_sql = """
select * from {{ source('seed_sources', 'raw_customers') }}
union all
select * from {{ source('seed_sources_2', 'raw_customers') }}
"""
class TestUnitTestSourceInput:
@pytest.fixture(scope="class")
def seeds(self):
@@ -102,3 +138,38 @@ class TestUnitTestSourceInput:
elif result.node.unique_id == "unit_test.test.customers.fail_test_customers":
assert result.status == TestStatus.Fail
assert len(results) == 6
class TestUnitTestSourceInputSameNames:
@pytest.fixture(scope="class")
def other_schema(self, unique_schema):
return unique_schema + "_other"
@pytest.fixture(scope="class")
def profiles_config_update(self, dbt_profile_target, unique_schema, other_schema):
outputs = {"default": dbt_profile_target, "otherschema": deepcopy(dbt_profile_target)}
outputs["default"]["schema"] = unique_schema
outputs["otherschema"]["schema"] = other_schema
return {"test": {"outputs": outputs, "target": "default"}}
@pytest.fixture(scope="class")
def seeds(self):
return {
"raw_customers.csv": raw_customers_csv,
}
@pytest.fixture(scope="class")
def models(self):
return {
"customers_duplicate_source_names.sql": customers_duplicate_source_names_sql,
"sources.yml": schema_duplicate_source_names_yml,
}
def test_source_input_same_names(self, project, other_schema):
results = run_dbt(["seed"])
project.create_test_schema(schema_name=other_schema)
results = run_dbt(["seed", "--target", "otherschema"])
results = run_dbt(["test", "--select", "test_type:unit"])
assert len(results) == 1