Add primary_key to manifest (#10096)

This commit is contained in:
dave-connors-3
2024-05-10 14:14:20 -05:00
committed by GitHub
parent ecf9436c6e
commit 8fe7d652ab
12 changed files with 3871 additions and 4629 deletions

View File

@@ -0,0 +1,6 @@
kind: Features
body: serialize inferred primary key
time: 2024-05-06T17:56:42.757673-05:00
custom:
Author: dave-connors-3
Issue: "9824"

View File

@@ -39,6 +39,15 @@ Freely make incremental, non-breaking changes in-place to the latest major versi
These types of minor, non-breaking changes are tested by [tests/unit/artifacts/test_base_resource.py::TestMinorSchemaChange](https://github.com/dbt-labs/dbt-core/blob/main/tests/unit/artifacts/test_base_resource.py).
#### Updating [schemas.getdbt.com](https://schemas.getdbt.com)
Non-breaking changes to artifact schemas require an update to the corresponding jsonschemas published to [schemas.getdbt.com](https://schemas.getdbt.com), which are defined in https://github.com/dbt-labs/schemas.getdbt.com. To do so:
1. Create a PR in https://github.com/dbt-labs/schemas.getdbt.com which reflects the schema changes to the artifact. The schema can be updated in-place for non-breaking changes. Example PR: https://github.com/dbt-labs/schemas.getdbt.com/pull/39
2. Merge the https://github.com/dbt-labs/schemas.getdbt.com PR
3. Observe the `Artifact Schema Check` CI check pass on the `dbt-core` PR that updates the artifact schemas, and merge the `dbt-core` PR!
Note: Although `jsonschema` validation using the schemas in [schemas.getdbt.com](https://schemas.getdbt.com) is not encouraged or formally supported, `jsonschema` validation should still continue to work once the schemas are updated because they are forward-compatible and can therefore be used to validate previous minor versions of the schema.
### Breaking changes
A breaking change is anything that:
* Deletes a required field

View File

@@ -31,6 +31,7 @@ class Model(CompiledResource):
latest_version: Optional[NodeVersion] = None
deprecation_date: Optional[datetime] = None
defer_relation: Optional[DeferRelation] = None
primary_key: List[str] = field(default_factory=list)
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)

View File

@@ -50,6 +50,7 @@ from dbt.contracts.graph.manifest import (
)
from dbt.contracts.graph.nodes import (
Exposure,
GenericTestNode,
Macro,
ManifestNode,
Metric,
@@ -466,6 +467,7 @@ class ManifestLoader:
self.process_docs(self.root_project)
self.process_metrics(self.root_project)
self.process_saved_queries(self.root_project)
self.process_model_inferred_primary_keys()
self.check_valid_group_config()
self.check_valid_access_property()
@@ -1149,6 +1151,15 @@ class ManifestLoader:
# 2. process `group_by` of SavedQuery for `depends_on``
_process_metrics_for_node(self.manifest, current_project, saved_query)
def process_model_inferred_primary_keys(self):
"""Processes Model nodes to populate their `primary_key`."""
for node in self.manifest.nodes.values():
if not isinstance(node, ModelNode):
continue
generic_tests = self._get_generic_tests_for_model(node)
primary_key = node.infer_primary_key(generic_tests)
node.primary_key = sorted(primary_key)
def update_semantic_model(self, semantic_model) -> None:
# This has to be done at the end of parsing because the referenced model
# might have alias/schema/database fields that are updated by yaml config.
@@ -1344,6 +1355,24 @@ class ManifestLoader:
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
fire_event(ParsePerfInfoPath(path=path))
def _get_generic_tests_for_model(
self,
model: ModelNode,
) -> List[GenericTestNode]:
"""Return a list of generic tests that are attached to the given model, including disabled tests"""
tests = []
for _, node in self.manifest.nodes.items():
if isinstance(node, GenericTestNode) and node.attached_node == model.unique_id:
tests.append(node)
for _, nodes in self.manifest.disabled.items():
for disabled_node in nodes:
if (
isinstance(disabled_node, GenericTestNode)
and disabled_node.attached_node == model.unique_id
):
tests.append(disabled_node)
return tests
def invalid_target_fail_unless_test(
node,

File diff suppressed because it is too large Load Diff

View File

@@ -282,6 +282,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"database": model_database,
"alias": "model",
"description": "The test model",
"primary_key": ["id"],
"columns": {
"id": {
"name": "id",
@@ -374,6 +375,7 @@ def expected_seeded_manifest(project, model_database=None, quote_model=False):
"database": project.database,
"alias": "second_model",
"description": "The second test model",
"primary_key": [],
"columns": {
"id": {
"name": "id",
@@ -924,6 +926,7 @@ def expected_references_manifest(project):
},
"deprecation_date": None,
"description": "",
"primary_key": [],
"docs": {"node_color": None, "show": True},
"fqn": ["test", "ephemeral_copy"],
"group": None,
@@ -989,6 +992,7 @@ def expected_references_manifest(project):
},
"deprecation_date": None,
"description": "A summmary table of the ephemeral copy of the seed data",
"primary_key": [],
"docs": {"node_color": None, "show": True},
"fqn": ["test", "ephemeral_summary"],
"group": "test_group",
@@ -1057,6 +1061,7 @@ def expected_references_manifest(project):
},
"deprecation_date": None,
"description": "A view of the summary of the ephemeral copy of the seed data",
"primary_key": [],
"docs": {"node_color": None, "show": True},
"fqn": ["test", "view_summary"],
"group": None,
@@ -1502,6 +1507,7 @@ def expected_versions_manifest(project):
"sources": [],
"depends_on": {"macros": [], "nodes": []},
"description": "A versioned model",
"primary_key": ["count", "first_name"],
"deprecation_date": ANY,
"docs": {"node_color": None, "show": True},
"fqn": ["test", "versioned_model", "v1"],
@@ -1572,6 +1578,7 @@ def expected_versions_manifest(project):
"sources": [],
"depends_on": {"macros": [], "nodes": []},
"description": "A versioned model",
"primary_key": ["first_name"],
"deprecation_date": None,
"docs": {"node_color": None, "show": True},
"fqn": ["test", "versioned_model", "v2"],
@@ -1625,6 +1632,7 @@ def expected_versions_manifest(project):
},
"deprecation_date": None,
"description": "",
"primary_key": [],
"docs": {"node_color": None, "show": True},
"fqn": ["test", "ref_versioned_model"],
"group": None,

View File

@@ -0,0 +1,88 @@
simple_model_sql = """
select 1 as id, 'blue' as color
"""
simple_model_unique_test = """
models:
- name: simple_model
columns:
- name: id
tests:
- unique
"""
simple_model_disabled_unique_test = """
models:
- name: simple_model
columns:
- name: id
tests:
- unique:
enabled: false
"""
simple_model_unique_not_null_tests = """
models:
- name: simple_model
columns:
- name: id
tests:
- unique
- not_null
"""
simple_model_unique_combo_of_columns = """
models:
- name: simple_model
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [id, color]
"""
simple_model_constraints = """
models:
- name: simple_model
config:
contract:
enforced: true
columns:
- name: id
data_type: int
constraints:
- type: not_null
- type: primary_key
- name: color
data_type: text
"""
simple_model_two_versions_both_configured = """
models:
- name: simple_model
latest_version: 1
columns:
- name: id
tests:
- unique
- not_null
versions:
- v: 1
- v: 2
"""
simple_model_two_versions_exclude_col = """
models:
- name: simple_model
latest_version: 1
columns:
- name: id
tests:
- unique
- not_null
versions:
- v: 1
- v: 2
columns:
- include: all
exclude: [id]
"""

View File

@@ -0,0 +1,157 @@
import pytest
from dbt.tests.util import get_manifest, run_dbt
from tests.functional.primary_keys.fixtures import (
simple_model_constraints,
simple_model_disabled_unique_test,
simple_model_sql,
simple_model_two_versions_both_configured,
simple_model_two_versions_exclude_col,
simple_model_unique_combo_of_columns,
simple_model_unique_not_null_tests,
simple_model_unique_test,
)
class TestSimpleModelNoYml:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
}
def test_simple_model_no_yml(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == []
class TestSimpleModelConstraints:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
"schema.yml": simple_model_constraints,
}
def test_simple_model_constraints(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == ["id"]
class TestSimpleModelUniqueNotNullTests:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
"schema.yml": simple_model_unique_not_null_tests,
}
def test_simple_model_unique_not_null_tests(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == ["id"]
class TestSimpleModelUniqueTests:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
"schema.yml": simple_model_unique_test,
}
def test_simple_model_unique_test(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == ["id"]
class TestSimpleModelDisabledUniqueTests:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
"schema.yml": simple_model_disabled_unique_test,
}
def test_simple_model_disabled_unique_test(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == ["id"]
class TestVersionedSimpleModel:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model_v1.sql": simple_model_sql,
"simple_model_v2.sql": simple_model_sql,
"schema.yml": simple_model_two_versions_both_configured,
}
def test_versioned_simple_model(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node_v1 = manifest.nodes["model.test.simple_model.v1"]
node_v2 = manifest.nodes["model.test.simple_model.v2"]
assert node_v1.primary_key == ["id"]
assert node_v2.primary_key == ["id"]
class TestVersionedSimpleModelExcludeTests:
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model_v1.sql": simple_model_sql,
"simple_model_v2.sql": simple_model_sql,
"schema.yml": simple_model_two_versions_exclude_col,
}
def test_versioned_simple_model_exclude_col(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node_v1 = manifest.nodes["model.test.simple_model.v1"]
node_v2 = manifest.nodes["model.test.simple_model.v2"]
assert node_v1.primary_key == ["id"]
assert node_v2.primary_key == []
class TestSimpleModelCombinationOfColumns:
@pytest.fixture(scope="class")
def packages(self):
return {
"packages": [
{
"git": "https://github.com/dbt-labs/dbt-utils.git",
"revision": "1.1.0",
},
]
}
@pytest.fixture(scope="class")
def models(self):
return {
"simple_model.sql": simple_model_sql,
"schema.yml": simple_model_unique_combo_of_columns,
}
def test_versioned_simple_combo_of_columns(self, project):
run_dbt(["deps"])
run_dbt(["run"])
manifest = get_manifest(project.project_root)
node = manifest.nodes["model.test.simple_model"]
assert node.primary_key == ["color", "id"]

View File

@@ -74,6 +74,7 @@ REQUIRED_PARSED_NODE_KEYS = frozenset(
"raw_code",
"language",
"description",
"primary_key",
"columns",
"fqn",
"build_path",

View File

@@ -26,6 +26,7 @@ def model_node():
metrics=[],
depends_on=DependsOn(),
description="",
primary_key=[],
database="test_db",
schema="test_schema",
alias="bar",

View File

@@ -149,6 +149,7 @@ def basic_compiled_dict():
"depends_on": {"macros": [], "nodes": []},
"database": "test_db",
"description": "",
"primary_key": [],
"schema": "test_schema",
"alias": "bar",
"tags": [],

View File

@@ -167,6 +167,7 @@ def base_parsed_model_dict():
"depends_on": {"macros": [], "nodes": []},
"database": "test_db",
"description": "",
"primary_key": [],
"schema": "test_schema",
"alias": "bar",
"tags": [],
@@ -220,6 +221,7 @@ def basic_parsed_model_object():
metrics=[],
depends_on=DependsOn(),
description="",
primary_key=[],
database="test_db",
schema="test_schema",
alias="bar",
@@ -274,6 +276,7 @@ def complex_parsed_model_dict():
"depends_on": {"macros": [], "nodes": ["model.test.bar"]},
"database": "test_db",
"description": "My parsed node",
"primary_key": [],
"schema": "test_schema",
"alias": "bar",
"tags": ["tag"],