Merge pull request #537 from dbt-labs/move_freshness_under_config

This commit is contained in:
Benoit Perigaud
2025-08-11 21:25:03 +02:00
committed by GitHub
29 changed files with 275 additions and 169 deletions

View File

@@ -36,7 +36,7 @@ jobs:
SNOWFLAKE_ROLE: ${{ vars.SNOWFLAKE_ROLE }}
SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }}
SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }}
SNOWFLAKE_SCHEMA: "dbt_utils_integration_tests_snowflake_${{ github.run_number }}"
SNOWFLAKE_SCHEMA: "dbt_project_evaluator_integration_tests_snowflake_${{ github.run_number }}"
# databricks
DATABRICKS_SCHEMA: "integration_tests_databricks_${{ github.run_number }}"
DATABRICKS_HOST: ${{ vars.DATABRICKS_HOST }}

3
.gitignore vendored
View File

@@ -27,4 +27,5 @@ integration_tests/package-lock.yml
# duckdb
*.db
*.duckdb
*.duckdb
dbt_internal_packages/

View File

@@ -2,7 +2,7 @@ name: 'dbt_project_evaluator'
version: '1.0.0'
config-version: 2
require-dbt-version: [">=1.8.0", "<2.0.0"]
require-dbt-version: [">=1.10.6", "<3.0.0"]
model-paths: ["models"]
analysis-paths: ["analysis"]
@@ -16,6 +16,9 @@ clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
flags:
require_nested_cumulative_type_params: True
require_yaml_configuration_for_mf_time_spines: True
dispatch:
- macro_namespace: dbt

View File

@@ -28,13 +28,18 @@ dispatch:
- macro_namespace: dbt
search_order: ['dbt_project_evaluator', 'dbt']
flags:
require_nested_cumulative_type_params: True
require_yaml_configuration_for_mf_time_spines: True
require_generic_test_arguments_property: True
models:
dbt_project_evaluator_integration_tests:
# materialize as ephemeral to prevent the fake models from executing, but keep them enabled
+materialized: ephemeral
dbt_project_evaluator:
marts:
data_tests:
tests:
fct_test_coverage:
# materialize as a table to ensure SQL query runs successfully
+materialized: table

View File

@@ -1,7 +1,7 @@
models:
- name: excluded_model
access: public
config:
access: public
contract:
enforced: true
columns:

View File

@@ -6,10 +6,11 @@ exposures:
type: dashboard
url: dave.com/metrics
maturity: low
tags: ['proserv']
owner:
name: dave's davey
email: proserv@dbt.com
config:
tags: ['proserv']
depends_on:
- ref('fct_model_6')

View File

@@ -43,6 +43,7 @@ metrics:
type_params:
# Specify the measure you are creating a proxy for.
measure: total_count
window: 30 days
cumulative_type_params:
window: 30 days
filter: |
{{ Dimension('dim__id') }} > 0

View File

@@ -1,10 +1,15 @@
version: 2
groups:
- name: my_awesome_group
owner:
email: thegoat@jaffleshop.com
models:
- name: report_1
description: tom cruise's minority report
access: public
config:
access: public
materialized: table
columns:
- name: col
@@ -13,9 +18,10 @@ models:
- name: report_2
description: the colbert report
access: public
config:
access: public
materialized: table
group: my_awesome_group
contract:
enforced: true
columns:

View File

@@ -13,9 +13,10 @@ models:
- name: stg_model_3
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- id
- color
arguments:
combination_of_columns:
- id
- color
- name: stg_model_2
columns:
- name: id

View File

@@ -4,8 +4,9 @@ sources:
- name: source_1
description: this is source 1.
schema: real_schema
freshness: # default freshness
warn_after: {count: 12, period: hour}
config:
freshness: # default freshness
warn_after: {count: 12, period: hour}
# database: real_database
tables:
- name: table_1
@@ -16,7 +17,8 @@ sources:
- name: table_2
- name: table_4
- name: table_5
freshness: null
config:
freshness: null
- name: raw_table_5
identifier: table_5

View File

@@ -8,8 +8,9 @@ models:
- unique
- not_null
- relationships:
to: ref('stg_model_2')
field: id
arguments:
to: ref('stg_model_2')
field: id
- name: "concat(coalesce('fake_column', ' '),'for_testing')"
data_tests:
- not_null

View File

@@ -1,8 +1,11 @@
models:
- name: metricflow_time_spine
description: default time spine for metricflow
- name: all_days
description: A time spine with one row per day, ranging from 2020-01-01 to 2039-12-31.
time_spine:
standard_granularity_column: date_day # Column for the standard grain of your table
columns:
- name: date_day
granularity: day
data_tests:
- unique
- not_null

View File

@@ -1,4 +1,4 @@
-- metricflow_time_spine.sql
-- metricflow time spine
{% set date_expr = "current_date()" if target.type in ["duckdb"] else dbt.current_timestamp() %}
{{
config(

View File

@@ -2,10 +2,6 @@
# HEY! This file is used in the integration tests with CI.
# You should __NEVER__ check credentials into version control. Thanks for reading :)
config:
send_anonymous_usage_stats: False
use_colors: True
integration_tests:
target: postgres
outputs:

View File

@@ -4,100 +4,122 @@ seeds:
- name: test_fct_multiple_sources_joined
data_tests:
- dbt_utils.equality:
name: equality_fct_multiple_sources_joined
compare_model: ref('fct_multiple_sources_joined')
compare_columns:
- child
- "{{ 'source_parents' if target.type != 'databricks' else 'child' }}"
config:
name: equality_fct_multiple_sources_joined
arguments:
compare_model: ref('fct_multiple_sources_joined')
compare_columns:
- child
- "{{ 'source_parents' if target.type != 'databricks' else 'child' }}"
- name: test_fct_direct_join_to_source
data_tests:
- dbt_utils.equality:
name: equality_fct_direct_join_to_source
compare_model: ref('fct_direct_join_to_source')
compare_columns:
- parent
- parent_resource_type
- child
- child_resource_type
- distance
config:
name: equality_fct_direct_join_to_source
arguments:
compare_model: ref('fct_direct_join_to_source')
compare_columns:
- parent
- parent_resource_type
- child
- child_resource_type
- distance
- name: test_fct_duplicate_sources
data_tests:
- dbt_utils.equality:
name: equality_fct_duplicate_sources
compare_model: ref('fct_duplicate_sources')
compare_columns:
- source_names
config:
name: equality_fct_duplicate_sources
arguments:
compare_model: ref('fct_duplicate_sources')
compare_columns:
- source_names
- name: test_fct_root_models
data_tests:
- dbt_utils.equality:
name: equality_fct_root_models
compare_model: ref('fct_root_models')
config:
name: equality_fct_root_models
arguments:
compare_model: ref('fct_root_models')
- name: test_fct_unused_sources
data_tests:
- dbt_utils.equality:
name: equality_fct_unused_sources
compare_model: ref('fct_unused_sources')
config:
name: equality_fct_unused_sources
arguments:
compare_model: ref('fct_unused_sources')
- name: test_fct_source_fanout
data_tests:
- dbt_utils.equality:
name: equality_fct_source_fanout
compare_model: ref('fct_source_fanout')
compare_columns:
- parent
- "{{ 'model_children' if target.type != 'databricks' else 'parent' }}"
config:
name: equality_fct_source_fanout
arguments:
compare_model: ref('fct_source_fanout')
compare_columns:
- parent
- "{{ 'model_children' if target.type != 'databricks' else 'parent' }}"
- name: test_fct_model_fanout
data_tests:
- dbt_utils.equality:
name: equality_fct_model_fanout
compare_model: ref('fct_model_fanout')
compare_columns:
- parent
- parent_model_type
- "{{ 'leaf_children' if target.type != 'databricks' else 'parent_model_type' }}"
config:
name: equality_fct_model_fanout
arguments:
compare_model: ref('fct_model_fanout')
compare_columns:
- parent
- parent_model_type
- "{{ 'leaf_children' if target.type != 'databricks' else 'parent_model_type' }}"
- name: test_fct_staging_dependent_on_staging
data_tests:
- dbt_utils.equality:
name: equality_fct_staging_dependent_on_staging
compare_model: ref('fct_staging_dependent_on_staging')
compare_columns:
- parent
- parent_model_type
- child
- child_model_type
config:
name: equality_fct_staging_dependent_on_staging
arguments:
compare_model: ref('fct_staging_dependent_on_staging')
compare_columns:
- parent
- parent_model_type
- child
- child_model_type
- name: test_fct_rejoining_of_upstream_concepts
data_tests:
- dbt_utils.equality:
name: equality_fct_rejoining_of_upstream_concepts
compare_model: ref('fct_rejoining_of_upstream_concepts')
compare_columns:
- parent
- child
- parent_and_child
- is_loop_independent
config:
name: equality_fct_rejoining_of_upstream_concepts
arguments:
compare_model: ref('fct_rejoining_of_upstream_concepts')
compare_columns:
- parent
- child
- parent_and_child
- is_loop_independent
- name: test_fct_hard_coded_references
data_tests:
- dbt_utils.equality:
name: equality_fct_hard_coded_references
compare_model: ref('fct_hard_coded_references')
compare_columns:
- model
- hard_coded_references
config:
name: equality_fct_hard_coded_references
arguments:
compare_model: ref('fct_hard_coded_references')
compare_columns:
- model
- hard_coded_references
- name: test_fct_too_many_joins
data_tests:
- dbt_utils.equality:
name: equality_fct_too_many_joins
compare_model: ref('fct_too_many_joins')
compare_columns:
- resource_name
- file_path
- join_count
config:
name: equality_fct_too_many_joins
arguments:
compare_model: ref('fct_too_many_joins')
compare_columns:
- resource_name
- file_path
- join_count

View File

@@ -7,8 +7,10 @@ seeds:
- docs
data_tests:
- dbt_utils.equality:
name: equality_fct_undocumented_models
compare_model: ref('fct_undocumented_models')
config:
name: equality_fct_undocumented_models
arguments:
compare_model: ref('fct_undocumented_models')
- name: test_fct_documentation_coverage
config:
@@ -21,16 +23,18 @@ seeds:
- docs
data_tests:
- dbt_utils.equality:
name: equality_fct_documentation_coverage
compare_model: ref('fct_documentation_coverage')
compare_columns:
- total_models
- documented_models
- documentation_coverage_pct
- staging_documentation_coverage_pct
- intermediate_documentation_coverage_pct
- marts_documentation_coverage_pct
- other_documentation_coverage_pct
config:
name: equality_fct_documentation_coverage
arguments:
compare_model: ref('fct_documentation_coverage')
compare_columns:
- total_models
- documented_models
- documentation_coverage_pct
- staging_documentation_coverage_pct
- intermediate_documentation_coverage_pct
- marts_documentation_coverage_pct
- other_documentation_coverage_pct
- name: test_fct_undocumented_source_tables
config:
@@ -38,8 +42,10 @@ seeds:
- docs
data_tests:
- dbt_utils.equality:
name: equality_fct_undocumented_source_tables
compare_model: ref('fct_undocumented_source_tables')
config:
name: equality_fct_undocumented_source_tables
arguments:
compare_model: ref('fct_undocumented_source_tables')
- name: test_fct_undocumented_sources
config:
@@ -47,5 +53,7 @@ seeds:
- docs
data_tests:
- dbt_utils.equality:
name: equality_fct_undocumented_sources
compare_model: ref('fct_undocumented_sources')
config:
name: equality_fct_undocumented_sources
arguments:
compare_model: ref('fct_undocumented_sources')

View File

@@ -4,14 +4,19 @@ seeds:
- name: test_fct_public_models_without_contract
data_tests:
- dbt_utils.equality:
compare_model: ref('fct_public_models_without_contract')
arguments:
compare_model: ref('fct_public_models_without_contract')
- name: test_fct_undocumented_public_models
data_tests:
- dbt_utils.equality:
compare_model: ref('fct_undocumented_public_models')
config:
name: equality_fct_undocumented_public_models
arguments:
compare_model: ref('fct_undocumented_public_models')
- name: test_fct_exposures_dependent_on_private_models
data_tests:
- dbt_utils.equality:
compare_model: ref('fct_exposures_dependent_on_private_models')
arguments:
compare_model: ref('fct_exposures_dependent_on_private_models')

View File

@@ -4,14 +4,17 @@ seeds:
- name: test_fct_chained_views_dependencies
data_tests:
- dbt_utils.equality:
name: equality_fct_chained_views_dependencies
compare_model: ref('fct_chained_views_dependencies')
compare_columns:
- parent
- child
- distance
config:
name: equality_fct_chained_views_dependencies
arguments:
compare_model: ref('fct_chained_views_dependencies')
compare_columns:
- parent
- child
- distance
- name: test_fct_exposure_parents_materializations
data_tests:
- dbt_utils.equality:
compare_model: ref('fct_exposure_parents_materializations')
arguments:
compare_model: ref('fct_exposure_parents_materializations')

View File

@@ -4,37 +4,45 @@ seeds:
- name: test_fct_model_directories
data_tests:
- dbt_utils.equality:
name: equality_fct_model_directories
compare_model: ref('fct_model_directories')
compare_columns:
- resource_name
- current_file_path
- change_file_path_to
config:
name: equality_fct_model_directories
arguments:
compare_model: ref('fct_model_directories')
compare_columns:
- resource_name
- current_file_path
- change_file_path_to
- name: test_fct_model_naming_conventions
data_tests:
- dbt_utils.equality:
name: equality_fct_model_naming_conventions
compare_model: ref('fct_model_naming_conventions')
compare_columns:
- resource_name
- model_type
- "{{ 'appropriate_prefixes' if target.type != 'databricks' else 'model_type' }}"
config:
name: equality_fct_model_naming_conventions
arguments:
compare_model: ref('fct_model_naming_conventions')
compare_columns:
- resource_name
- model_type
- "{{ 'appropriate_prefixes' if target.type != 'databricks' else 'model_type' }}"
- name: test_fct_source_directories
data_tests:
- dbt_utils.equality:
name: equality_fct_source_directories
compare_model: ref('fct_source_directories')
compare_columns:
- resource_name
- current_file_path
- change_file_path_to
config:
name: equality_fct_source_directories
arguments:
compare_model: ref('fct_source_directories')
compare_columns:
- resource_name
- current_file_path
- change_file_path_to
- name: test_fct_test_directories
data_tests:
- dbt_utils.equality:
name: equality_fct_test_directories
compare_model: ref('fct_test_directories')
compare_columns:
- test_name
- model_name
- current_test_directory
- change_test_directory_to
config:
name: equality_fct_test_directories
arguments:
compare_model: ref('fct_test_directories')
compare_columns:
- test_name
- model_name
- current_test_directory
- change_test_directory_to

View File

@@ -4,11 +4,13 @@ seeds:
- name: test_fct_missing_primary_key_tests
data_tests:
- dbt_utils.equality:
name: equality_fct_missing_primary_key_tests
compare_model: ref('fct_missing_primary_key_tests')
exclude_columns:
- resource_type
- model_type
config:
name: equality_fct_missing_primary_key_tests
arguments:
compare_model: ref('fct_missing_primary_key_tests')
exclude_columns:
- resource_type
- model_type
- name: test_fct_test_coverage
config:
@@ -20,21 +22,25 @@ seeds:
other_test_coverage_pct: *float
data_tests:
- dbt_utils.equality:
name: equality_fct_test_coverage
compare_model: ref('fct_test_coverage')
compare_columns:
- total_models
- total_tests
- tested_models
- "{{ 'test_coverage_pct' if not target.name in ['duckdb', 'databricks', 'trino'] else 'tested_models' }}"
- test_to_model_ratio
- staging_test_coverage_pct
- intermediate_test_coverage_pct
- marts_test_coverage_pct
- other_test_coverage_pct
config:
name: equality_fct_test_coverage
arguments:
compare_model: ref('fct_test_coverage')
compare_columns:
- total_models
- total_tests
- tested_models
- "{{ 'test_coverage_pct' if not target.name in ['duckdb', 'databricks', 'trino'] else 'tested_models' }}"
- test_to_model_ratio
- staging_test_coverage_pct
- intermediate_test_coverage_pct
- marts_test_coverage_pct
- other_test_coverage_pct
- name: test_fct_sources_without_freshness
data_tests:
- dbt_utils.equality:
name: equality_fct_sources_without_freshness
compare_model: ref('fct_sources_without_freshness')
config:
name: equality_fct_sources_without_freshness
arguments:
compare_model: ref('fct_sources_without_freshness')

View File

@@ -30,6 +30,11 @@ dispatch:
on-run-end: "{{ dbt_project_evaluator.print_dbt_project_evaluator_issues() }}"
flags:
require_nested_cumulative_type_params: True
require_yaml_configuration_for_mf_time_spines: True
require_generic_test_arguments_property: True
models:
dbt_project_evaluator_integration_tests_2:
# materialize as ephemeral to prevent the fake models from executing, but keep them enabled

View File

@@ -2,10 +2,6 @@
# HEY! This file is used in the integration tests with CI.
# You should __NEVER__ check credentials into version control. Thanks for reading :)
config:
send_anonymous_usage_stats: False
use_colors: True
integration_tests:
target: postgres
outputs:

View File

@@ -26,8 +26,8 @@
wrap_string_with_quotes(node.type_params.numerator | tojson),
wrap_string_with_quotes(node.type_params.denominator | tojson),
wrap_string_with_quotes(node.type_params.expr),
wrap_string_with_quotes(node.type_params.window | tojson),
wrap_string_with_quotes(node.type_params.grain_to_date),
wrap_string_with_quotes((node.type_params.cumulative_type_params.window or node.type_params.window) | tojson),
wrap_string_with_quotes((node.type_params.cumulative_type_params.grain_to_date or node.type_params.grain_to_date)),
wrap_string_with_quotes(node.meta | tojson)
]
%}

View File

@@ -24,8 +24,14 @@
"cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")",
"cast(" ~ node.config.enabled ~ " as " ~ dbt.type_boolean() ~ ")",
wrap_string_with_quotes(node.loaded_at_field | replace("'", "_")),
"cast(" ~ ((node.freshness != None) and (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count)
or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count))) | trim ~ " as boolean)",
"cast(" ~ (
((node.config.freshness != None) and (dbt_project_evaluator.is_not_empty_string(node.config.freshness.warn_after.count)
or dbt_project_evaluator.is_not_empty_string(node.config.freshness.error_after.count)))
or ((node.freshness != None) and (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count)
or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count)))
) | trim ~ " as boolean)",
wrap_string_with_quotes(node.database),
wrap_string_with_quotes(node.schema),
wrap_string_with_quotes(node.package_name),

View File

@@ -1,5 +1,14 @@
-- this model finds cases where a model has 0 direct parents, likely due to a lack of source or ref function
{% if execute %}
{% set metric_flow_time_spine_names = graph.nodes.values()
| selectattr("resource_type", "equalto", "model")
| rejectattr("time_spine", "none")
| map(attribute = "name")
| join("', '")
%}
{% endif %}
with model_relationships as (
select
*
@@ -9,7 +18,9 @@ with model_relationships as (
-- filtering parents could result in incorrectly flagging nodes that depend on excluded nodes
and not child_is_excluded
-- exclude required time spine
and child != 'metricflow_time_spine'
{% if metric_flow_time_spine_names %}
and child not in ('{{ metric_flow_time_spine_names }}')
{% endif %}
),
final as (

View File

@@ -10,9 +10,11 @@ models:
description: the number of models in the project with a description divided by the total number of models in the project
data_tests:
- dbt_utils.accepted_range:
name: valid_documentation_coverage
min_value: "{{ var('documentation_coverage_target') }}"
severity: warn
config:
name: valid_documentation_coverage
severity: warn
arguments:
min_value: "{{ var('documentation_coverage_target') }}"
- name: fct_undocumented_models
description: >
This model contains all models that do not have a description configured in a YML file.

View File

@@ -8,4 +8,5 @@ models:
the chain to table or incremental.
data_tests:
- is_empty:
severity: warn
config:
severity: warn

View File

@@ -1,11 +1,22 @@
-- all models with inappropriate (or lack of) pre-fix
-- ensure dbt project has consistent naming conventions
{% if execute %}
{% set metric_flow_time_spine_names = graph.nodes.values()
| selectattr("resource_type", "equalto", "model")
| rejectattr("time_spine", "none")
| map(attribute = "name")
| join("', '")
%}
{% endif %}
with all_graph_resources as (
select * from {{ ref('int_all_graph_resources') }}
where not is_excluded
-- exclude required metricflow time spine
and resource_name != 'metricflow_time_spine'
-- exclude required time spine
{% if metric_flow_time_spine_names %}
and resource_name not in ('{{ metric_flow_time_spine_names }}')
{% endif %}
),
naming_convention_prefixes as (

View File

@@ -14,9 +14,11 @@ models:
description: the number of models in the project with at least one test configured divided by the total number of models in the project
data_tests:
- dbt_utils.accepted_range:
name: valid_test_coverage
min_value: "{{ var('test_coverage_target') }}"
severity: warn
config:
name: valid_test_coverage
severity: warn
arguments:
min_value: "{{ var('test_coverage_target') }}"
- name: fct_missing_primary_key_tests
description: this model has one record for every model without unique and not null tests configured on a single column
data_tests: