Partial parse yaml snapshots (#10907)

This commit is contained in:
Gerda Shank
2024-10-23 14:16:33 -04:00
committed by GitHub
parent f7b7935a97
commit bdb79e8626
7 changed files with 82 additions and 6 deletions

View File

@@ -0,0 +1,6 @@
kind: Fixes
body: Implement partial parsing for all-yaml snapshots
time: 2024-10-22T22:29:27.396378-04:00
custom:
Author: gshank
Issue: "10903"

View File

@@ -192,6 +192,7 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
snapshots: List[str] = field(default_factory=list)
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.

View File

@@ -59,6 +59,7 @@ from dbt.contracts.graph.nodes import (
SeedNode,
SemanticModel,
SingularTestNode,
SnapshotNode,
SourceDefinition,
UnitTestDefinition,
UnitTestFileFixture,
@@ -1600,12 +1601,14 @@ class Manifest(MacroMethods, dbtClassMixin):
if isinstance(node, GenericTestNode):
assert test_from
source_file.add_test(node.unique_id, test_from)
if isinstance(node, Metric):
elif isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
if isinstance(node, Exposure):
elif isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
if isinstance(node, Group):
elif isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(node, SnapshotNode):
source_file.snapshots.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass
else:

View File

@@ -658,10 +658,14 @@ class PartialParsing:
key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
if key_diff["changed"]:
for elem in key_diff["changed"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)
if key_diff["deleted"]:
for elem in key_diff["deleted"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
if key_diff["added"]:
for elem in key_diff["added"]:
@@ -673,6 +677,8 @@ class PartialParsing:
continue
elem = self.get_schema_element(new_yaml_dict[dict_key], name)
if elem:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)
@@ -828,6 +834,8 @@ class PartialParsing:
# remove elem node and remove unique_id from node_patches
for elem_unique_id in elem_unique_ids:
# might have been already removed
# For all-yaml snapshots, we don't do this, since the node
# should have already been removed.
if (
elem_unique_id in self.saved_manifest.nodes
or elem_unique_id in self.saved_manifest.disabled
@@ -868,6 +876,19 @@ class PartialParsing:
self.saved_manifest.nodes.pop(test_unique_id)
schema_file.remove_tests(dict_key, name)
def delete_yaml_snapshot(self, schema_file, snapshot_dict):
snapshot_name = snapshot_dict["name"]
snapshots = schema_file.snapshots.copy()
for unique_id in snapshots:
if unique_id in self.saved_manifest.nodes:
snapshot = self.saved_manifest.nodes[unique_id]
if snapshot.name == snapshot_name:
self.saved_manifest.nodes.pop(unique_id)
schema_file.snapshots.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
schema_file.snapshots.remove(unique_id)
def delete_schema_source(self, schema_file, source_dict):
# both patches, tests, and source nodes
source_name = source_dict["name"]

View File

@@ -309,8 +309,9 @@ class SchemaParser(SimpleParser[YamlBlock, ModelNode]):
snapshot_node.raw_code = "select * from {{ " + snapshot["relation"] + " }}"
# Add our new node to the manifest, and note that ref lookup collections
# will need to be rebuilt.
self.manifest.add_node_nofile(snapshot_node)
# will need to be rebuilt. This adds the node unique_id to the "snapshots"
# list in the SchemaSourceFile.
self.manifest.add_node(block.file, snapshot_node)
rebuild_refs = True
if rebuild_refs:

View File

@@ -292,7 +292,6 @@ snapshots_pg__snapshot_sql = """
"""
snapshots_pg__snapshot_yml = """
version: 2
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
@@ -304,6 +303,18 @@ snapshots:
owner: 'a_owner'
"""
snapshots_pg__snapshot_mod_yml = """
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
config:
unique_key: "id || '-' || first_name"
strategy: timestamp
updated_at: updated_at
meta:
owner: 'b_owner'
"""
snapshots_pg__snapshot_no_target_schema_sql = """
{% snapshot snapshot_actual %}

View File

@@ -8,6 +8,7 @@ from dbt.tests.util import (
check_relations_equal,
relation_from_name,
run_dbt,
update_config_file,
write_file,
)
from tests.functional.snapshots.fixtures import (
@@ -18,6 +19,7 @@ from tests.functional.snapshots.fixtures import (
models__schema_yml,
seeds__seed_csv,
seeds__seed_newcol_csv,
snapshots_pg__snapshot_mod_yml,
snapshots_pg__snapshot_no_target_schema_sql,
snapshots_pg__snapshot_sql,
snapshots_pg__snapshot_yml,
@@ -394,3 +396,34 @@ class BasicYaml(Basic):
class TestBasicSnapshotYaml(BasicYaml):
def test_basic_snapshot_yaml(self, project):
snapshot_setup(project, num_snapshot_models=1)
class TestYamlSnapshotPartialParsing(BasicYaml):
def test_snapshot_partial_parsing(self, project):
manifest = run_dbt(["parse"])
snapshot_id = "snapshot.test.snapshot_actual"
assert snapshot_id in manifest.nodes
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "a_owner"
# change snapshot yml file and re-parse
write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "b_owner"
# modify dbt_project.yml and re-parse
config_updates = {
"snapshots": {
"test": {
"+snapshot_meta_column_names": {
"dbt_valid_to": "test_valid_to",
"dbt_valid_from": "test_valid_from",
},
}
}
}
update_config_file(config_updates, "dbt_project.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.config.snapshot_meta_column_names.dbt_valid_to == "test_valid_to"