mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-17 19:31:34 +00:00
Add primary_key to manifest (#10096)
This commit is contained in:
6
.changes/unreleased/Features-20240506-175642.yaml
Normal file
6
.changes/unreleased/Features-20240506-175642.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
kind: Features
|
||||
body: serialize inferred primary key
|
||||
time: 2024-05-06T17:56:42.757673-05:00
|
||||
custom:
|
||||
Author: dave-connors-3
|
||||
Issue: "9824"
|
||||
@@ -39,6 +39,15 @@ Freely make incremental, non-breaking changes in-place to the latest major versi
|
||||
|
||||
These types of minor, non-breaking changes are tested by [tests/unit/artifacts/test_base_resource.py::TestMinorSchemaChange](https://github.com/dbt-labs/dbt-core/blob/main/tests/unit/artifacts/test_base_resource.py).
|
||||
|
||||
|
||||
#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
|
||||
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
|
||||
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
|
||||
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
|
||||
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!
|
||||
|
||||
Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.
|
||||
|
||||
### Breaking changes
|
||||
A breaking change is anything that:
|
||||
* Deletes a required field
|
||||
|
||||
@@ -31,6 +31,7 @@ class Model(CompiledResource):
|
||||
latest_version: Optional[NodeVersion] = None
|
||||
deprecation_date: Optional[datetime] = None
|
||||
defer_relation: Optional[DeferRelation] = None
|
||||
primary_key: List[str] = field(default_factory=list)
|
||||
|
||||
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
||||
dct = super().__post_serialize__(dct, context)
|
||||
|
||||
@@ -50,6 +50,7 @@ from dbt.contracts.graph.manifest import (
|
||||
)
|
||||
from dbt.contracts.graph.nodes import (
|
||||
Exposure,
|
||||
GenericTestNode,
|
||||
Macro,
|
||||
ManifestNode,
|
||||
Metric,
|
||||
@@ -466,6 +467,7 @@ class ManifestLoader:
|
||||
self.process_docs(self.root_project)
|
||||
self.process_metrics(self.root_project)
|
||||
self.process_saved_queries(self.root_project)
|
||||
self.process_model_inferred_primary_keys()
|
||||
self.check_valid_group_config()
|
||||
self.check_valid_access_property()
|
||||
|
||||
@@ -1149,6 +1151,15 @@ class ManifestLoader:
|
||||
# 2. process `group_by` of SavedQuery for `depends_on``
|
||||
_process_metrics_for_node(self.manifest, current_project, saved_query)
|
||||
|
||||
def process_model_inferred_primary_keys(self):
|
||||
"""Processes Model nodes to populate their `primary_key`."""
|
||||
for node in self.manifest.nodes.values():
|
||||
if not isinstance(node, ModelNode):
|
||||
continue
|
||||
generic_tests = self._get_generic_tests_for_model(node)
|
||||
primary_key = node.infer_primary_key(generic_tests)
|
||||
node.primary_key = sorted(primary_key)
|
||||
|
||||
def update_semantic_model(self, semantic_model) -> None:
|
||||
# This has to be done at the end of parsing because the referenced model
|
||||
# might have alias/schema/database fields that are updated by yaml config.
|
||||
@@ -1344,6 +1355,24 @@ class ManifestLoader:
|
||||
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
|
||||
fire_event(ParsePerfInfoPath(path=path))
|
||||
|
||||
def _get_generic_tests_for_model(
|
||||
self,
|
||||
model: ModelNode,
|
||||
) -> List[GenericTestNode]:
|
||||
"""Return a list of generic tests that are attached to the given model, including disabled tests"""
|
||||
tests = []
|
||||
for _, node in self.manifest.nodes.items():
|
||||
if isinstance(node, GenericTestNode) and node.attached_node == model.unique_id:
|
||||
tests.append(node)
|
||||
for _, nodes in self.manifest.disabled.items():
|
||||
for disabled_node in nodes:
|
||||
if (
|
||||
isinstance(disabled_node, GenericTestNode)
|
||||
and disabled_node.attached_node == model.unique_id
|
||||
):
|
||||
tests.append(disabled_node)
|
||||
return tests
|
||||
|
||||
|
||||
def invalid_target_fail_unless_test(
|
||||
node,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -282,6 +282,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"database": model_database,
|
||||
"alias": "model",
|
||||
"description": "The test model",
|
||||
"primary_key": ["id"],
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
@@ -374,6 +375,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
|
||||
"database": project.database,
|
||||
"alias": "second_model",
|
||||
"description": "The second test model",
|
||||
"primary_key": [],
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
@@ -924,6 +926,7 @@ def expected_references_manifest(project):
|
||||
},
|
||||
"deprecation_date": None,
|
||||
"description": "",
|
||||
"primary_key": [],
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "ephemeral_copy"],
|
||||
"group": None,
|
||||
@@ -989,6 +992,7 @@ def expected_references_manifest(project):
|
||||
},
|
||||
"deprecation_date": None,
|
||||
"description": "A summmary table of the ephemeral copy of the seed data",
|
||||
"primary_key": [],
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "ephemeral_summary"],
|
||||
"group": "test_group",
|
||||
@@ -1057,6 +1061,7 @@ def expected_references_manifest(project):
|
||||
},
|
||||
"deprecation_date": None,
|
||||
"description": "A view of the summary of the ephemeral copy of the seed data",
|
||||
"primary_key": [],
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "view_summary"],
|
||||
"group": None,
|
||||
@@ -1502,6 +1507,7 @@ def expected_versions_manifest(project):
|
||||
"sources": [],
|
||||
"depends_on": {"macros": [], "nodes": []},
|
||||
"description": "A versioned model",
|
||||
"primary_key": ["count", "first_name"],
|
||||
"deprecation_date": ANY,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "versioned_model", "v1"],
|
||||
@@ -1572,6 +1578,7 @@ def expected_versions_manifest(project):
|
||||
"sources": [],
|
||||
"depends_on": {"macros": [], "nodes": []},
|
||||
"description": "A versioned model",
|
||||
"primary_key": ["first_name"],
|
||||
"deprecation_date": None,
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "versioned_model", "v2"],
|
||||
@@ -1625,6 +1632,7 @@ def expected_versions_manifest(project):
|
||||
},
|
||||
"deprecation_date": None,
|
||||
"description": "",
|
||||
"primary_key": [],
|
||||
"docs": {"node_color": None, "show": True},
|
||||
"fqn": ["test", "ref_versioned_model"],
|
||||
"group": None,
|
||||
|
||||
88
tests/functional/primary_keys/fixtures.py
Normal file
88
tests/functional/primary_keys/fixtures.py
Normal file
@@ -0,0 +1,88 @@
|
||||
simple_model_sql = """
|
||||
select 1 as id, 'blue' as color
|
||||
"""
|
||||
|
||||
simple_model_unique_test = """
|
||||
models:
|
||||
- name: simple_model
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- unique
|
||||
"""
|
||||
|
||||
simple_model_disabled_unique_test = """
|
||||
models:
|
||||
- name: simple_model
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- unique:
|
||||
enabled: false
|
||||
|
||||
"""
|
||||
|
||||
simple_model_unique_not_null_tests = """
|
||||
models:
|
||||
- name: simple_model
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
"""
|
||||
|
||||
simple_model_unique_combo_of_columns = """
|
||||
models:
|
||||
- name: simple_model
|
||||
tests:
|
||||
- dbt_utils.unique_combination_of_columns:
|
||||
combination_of_columns: [id, color]
|
||||
"""
|
||||
|
||||
simple_model_constraints = """
|
||||
models:
|
||||
- name: simple_model
|
||||
config:
|
||||
contract:
|
||||
enforced: true
|
||||
columns:
|
||||
- name: id
|
||||
data_type: int
|
||||
constraints:
|
||||
- type: not_null
|
||||
- type: primary_key
|
||||
- name: color
|
||||
data_type: text
|
||||
"""
|
||||
|
||||
simple_model_two_versions_both_configured = """
|
||||
models:
|
||||
- name: simple_model
|
||||
latest_version: 1
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
versions:
|
||||
- v: 1
|
||||
- v: 2
|
||||
"""
|
||||
|
||||
simple_model_two_versions_exclude_col = """
|
||||
models:
|
||||
- name: simple_model
|
||||
latest_version: 1
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
versions:
|
||||
- v: 1
|
||||
- v: 2
|
||||
columns:
|
||||
- include: all
|
||||
exclude: [id]
|
||||
"""
|
||||
157
tests/functional/primary_keys/test_primary_keys.py
Normal file
157
tests/functional/primary_keys/test_primary_keys.py
Normal file
@@ -0,0 +1,157 @@
|
||||
import pytest
|
||||
|
||||
from dbt.tests.util import get_manifest, run_dbt
|
||||
from tests.functional.primary_keys.fixtures import (
|
||||
simple_model_constraints,
|
||||
simple_model_disabled_unique_test,
|
||||
simple_model_sql,
|
||||
simple_model_two_versions_both_configured,
|
||||
simple_model_two_versions_exclude_col,
|
||||
simple_model_unique_combo_of_columns,
|
||||
simple_model_unique_not_null_tests,
|
||||
simple_model_unique_test,
|
||||
)
|
||||
|
||||
|
||||
class TestSimpleModelNoYml:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
}
|
||||
|
||||
def test_simple_model_no_yml(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == []
|
||||
|
||||
|
||||
class TestSimpleModelConstraints:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_constraints,
|
||||
}
|
||||
|
||||
def test_simple_model_constraints(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == ["id"]
|
||||
|
||||
|
||||
class TestSimpleModelUniqueNotNullTests:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_unique_not_null_tests,
|
||||
}
|
||||
|
||||
def test_simple_model_unique_not_null_tests(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == ["id"]
|
||||
|
||||
|
||||
class TestSimpleModelUniqueTests:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_unique_test,
|
||||
}
|
||||
|
||||
def test_simple_model_unique_test(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == ["id"]
|
||||
|
||||
|
||||
class TestSimpleModelDisabledUniqueTests:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_disabled_unique_test,
|
||||
}
|
||||
|
||||
def test_simple_model_disabled_unique_test(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == ["id"]
|
||||
|
||||
|
||||
class TestVersionedSimpleModel:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model_v1.sql": simple_model_sql,
|
||||
"simple_model_v2.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_two_versions_both_configured,
|
||||
}
|
||||
|
||||
def test_versioned_simple_model(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node_v1 = manifest.nodes["model.test.simple_model.v1"]
|
||||
node_v2 = manifest.nodes["model.test.simple_model.v2"]
|
||||
assert node_v1.primary_key == ["id"]
|
||||
assert node_v2.primary_key == ["id"]
|
||||
|
||||
|
||||
class TestVersionedSimpleModelExcludeTests:
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model_v1.sql": simple_model_sql,
|
||||
"simple_model_v2.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_two_versions_exclude_col,
|
||||
}
|
||||
|
||||
def test_versioned_simple_model_exclude_col(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node_v1 = manifest.nodes["model.test.simple_model.v1"]
|
||||
node_v2 = manifest.nodes["model.test.simple_model.v2"]
|
||||
assert node_v1.primary_key == ["id"]
|
||||
assert node_v2.primary_key == []
|
||||
|
||||
|
||||
class TestSimpleModelCombinationOfColumns:
|
||||
@pytest.fixture(scope="class")
|
||||
def packages(self):
|
||||
return {
|
||||
"packages": [
|
||||
{
|
||||
"git": "https://github.com/dbt-labs/dbt-utils.git",
|
||||
"revision": "1.1.0",
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def models(self):
|
||||
return {
|
||||
"simple_model.sql": simple_model_sql,
|
||||
"schema.yml": simple_model_unique_combo_of_columns,
|
||||
}
|
||||
|
||||
def test_versioned_simple_combo_of_columns(self, project):
|
||||
run_dbt(["deps"])
|
||||
run_dbt(["run"])
|
||||
manifest = get_manifest(project.project_root)
|
||||
node = manifest.nodes["model.test.simple_model"]
|
||||
assert node.primary_key == ["color", "id"]
|
||||
@@ -74,6 +74,7 @@ REQUIRED_PARSED_NODE_KEYS = frozenset(
|
||||
"raw_code",
|
||||
"language",
|
||||
"description",
|
||||
"primary_key",
|
||||
"columns",
|
||||
"fqn",
|
||||
"build_path",
|
||||
|
||||
@@ -26,6 +26,7 @@ def model_node():
|
||||
metrics=[],
|
||||
depends_on=DependsOn(),
|
||||
description="",
|
||||
primary_key=[],
|
||||
database="test_db",
|
||||
schema="test_schema",
|
||||
alias="bar",
|
||||
|
||||
@@ -149,6 +149,7 @@ def basic_compiled_dict():
|
||||
"depends_on": {"macros": [], "nodes": []},
|
||||
"database": "test_db",
|
||||
"description": "",
|
||||
"primary_key": [],
|
||||
"schema": "test_schema",
|
||||
"alias": "bar",
|
||||
"tags": [],
|
||||
|
||||
@@ -167,6 +167,7 @@ def base_parsed_model_dict():
|
||||
"depends_on": {"macros": [], "nodes": []},
|
||||
"database": "test_db",
|
||||
"description": "",
|
||||
"primary_key": [],
|
||||
"schema": "test_schema",
|
||||
"alias": "bar",
|
||||
"tags": [],
|
||||
@@ -220,6 +221,7 @@ def basic_parsed_model_object():
|
||||
metrics=[],
|
||||
depends_on=DependsOn(),
|
||||
description="",
|
||||
primary_key=[],
|
||||
database="test_db",
|
||||
schema="test_schema",
|
||||
alias="bar",
|
||||
@@ -274,6 +276,7 @@ def complex_parsed_model_dict():
|
||||
"depends_on": {"macros": [], "nodes": ["model.test.bar"]},
|
||||
"database": "test_db",
|
||||
"description": "My parsed node",
|
||||
"primary_key": [],
|
||||
"schema": "test_schema",
|
||||
"alias": "bar",
|
||||
"tags": ["tag"],
|
||||
|
||||
Reference in New Issue
Block a user