mirror of
https://github.com/dbt-labs/dbt-project-evaluator.git
synced 2025-12-18 02:11:27 +00:00
changed macro into model and built first fct_ model to check for dag issues
This commit is contained in:
@@ -1,93 +0,0 @@
|
|||||||
{% macro get_dependencies() %}
|
|
||||||
|
|
||||||
{%- set sql -%}
|
|
||||||
-- one record for each node in the DAG (models and sources) and its direct parent
|
|
||||||
with direct_relationships as (
|
|
||||||
|
|
||||||
{%- for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%}
|
|
||||||
{%- set outer_loop = loop -%}
|
|
||||||
|
|
||||||
{%- if model.depends_on.nodes|length == 0 -%}
|
|
||||||
|
|
||||||
select
|
|
||||||
'{{model.name}}' as model,
|
|
||||||
NULL as direct_parent,
|
|
||||||
NULL as direct_parent_type {# if this field still useful? if not could simply by looping through depends_on.nodes instead of refs & sources seperately #}
|
|
||||||
|
|
||||||
{%- else -%}
|
|
||||||
|
|
||||||
{%- for model_parent in model.refs -%}
|
|
||||||
|
|
||||||
select
|
|
||||||
'{{model.name}}' as model,
|
|
||||||
'{{model_parent.0}}' as direct_parent,
|
|
||||||
'model' as direct_parent_type
|
|
||||||
{% if not loop.last %}union all{% endif %}
|
|
||||||
|
|
||||||
{% endfor -%}
|
|
||||||
|
|
||||||
{%- for source_parent in model.sources -%}
|
|
||||||
|
|
||||||
{% if loop.first and model.refs|length > 0 %}union all{% endif %}
|
|
||||||
select
|
|
||||||
'{{model.name}}' as model,
|
|
||||||
'{{source_parent.0}}.{{source_parent.1}}' as direct_parent,
|
|
||||||
'source' as direct_parent_type
|
|
||||||
{% if not loop.last %}union all{% endif %}
|
|
||||||
|
|
||||||
{% endfor -%}
|
|
||||||
|
|
||||||
{%- endif -%}
|
|
||||||
|
|
||||||
{% if not outer_loop.last %}union all{% endif %}
|
|
||||||
|
|
||||||
{% endfor -%}
|
|
||||||
|
|
||||||
{%- for source in graph.sources.values() -%}
|
|
||||||
|
|
||||||
{% if loop.first and graph.nodes|length > 0 %}union all{% endif %}
|
|
||||||
select
|
|
||||||
'{{source.source_name}}.{{source.name}}' as model,
|
|
||||||
NULL as direct_parent,
|
|
||||||
NULL as direct_parent_type
|
|
||||||
{% if not loop.last %}union all{% endif %}
|
|
||||||
|
|
||||||
{% endfor -%}
|
|
||||||
|
|
||||||
),
|
|
||||||
|
|
||||||
-- recursive CTE
|
|
||||||
-- one record for every node and each of its downstream children (including itself)
|
|
||||||
all_relationships as (
|
|
||||||
|
|
||||||
-- anchor
|
|
||||||
select distinct
|
|
||||||
model as parent,
|
|
||||||
model as child,
|
|
||||||
0 as distance,
|
|
||||||
array_construct(child) as path {# snowflake-specific, but helpful for troubleshooting right now #}
|
|
||||||
from direct_relationships
|
|
||||||
-- where direct_parent is null {# optional lever to change filtering of anchor clause to only include root nodes #}
|
|
||||||
|
|
||||||
union all
|
|
||||||
|
|
||||||
-- recursive clause
|
|
||||||
select
|
|
||||||
all_relationships.parent as parent,
|
|
||||||
direct_relationships.model as child,
|
|
||||||
all_relationships.distance+1 as distance,
|
|
||||||
array_append(all_relationships.path, direct_relationships.model) as path
|
|
||||||
from direct_relationships
|
|
||||||
inner join all_relationships
|
|
||||||
on all_relationships.child = direct_relationships.direct_parent
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
select * from all_relationships
|
|
||||||
order by parent, distance
|
|
||||||
|
|
||||||
{%- endset -%}
|
|
||||||
|
|
||||||
{% do log(sql, info=true) %}
|
|
||||||
|
|
||||||
{% endmacro %}
|
|
||||||
9
models/audit/dag/dag.yml
Normal file
9
models/audit/dag/dag.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_dag_relationships
|
||||||
|
description: "This table shows one record for every node and each of its downstream children (including itself)."
|
||||||
|
- name: fct_source_fanout
|
||||||
|
description: "This table shows each parent/child relationship where a source is the direct parent of multiple nodes in the DAG."
|
||||||
|
tests:
|
||||||
|
- is_empty
|
||||||
28
models/audit/dag/fct_source_fanout.sql
Normal file
28
models/audit/dag/fct_source_fanout.sql
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
-- this model finds cases where a source is used in multiple direct downstream models
|
||||||
|
with direct_source_relationships as (
|
||||||
|
select
|
||||||
|
*
|
||||||
|
from {{ ref('stg_dag_relationships') }}
|
||||||
|
where distance = 1
|
||||||
|
and parent_type = 'source'
|
||||||
|
),
|
||||||
|
|
||||||
|
source_fanout as (
|
||||||
|
select
|
||||||
|
parent,
|
||||||
|
count(*)
|
||||||
|
from direct_source_relationships
|
||||||
|
group by 1
|
||||||
|
having count(*) > 1
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
direct_source_relationships.*
|
||||||
|
from direct_source_relationships
|
||||||
|
inner join source_fanout
|
||||||
|
on direct_source_relationships.parent = source_fanout.parent
|
||||||
|
order by direct_source_relationships.parent
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
95
models/audit/dag/stg_dag_relationships.sql
Normal file
95
models/audit/dag/stg_dag_relationships.sql
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
-- TO DO: only include ENABLED nodes
|
||||||
|
-- TO DO: exclude models that are part of the audit package
|
||||||
|
-- can use package_name attribute in final version
|
||||||
|
-- TO DO: fix whitespace
|
||||||
|
|
||||||
|
-- one record for each node in the DAG (models and sources) and its direct parent
|
||||||
|
with direct_relationships as (
|
||||||
|
|
||||||
|
{%- for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%}
|
||||||
|
{%- set outer_loop = loop -%}
|
||||||
|
|
||||||
|
{%- if model.depends_on.nodes|length == 0 -%}
|
||||||
|
|
||||||
|
select
|
||||||
|
'{{model.name}}' as node,
|
||||||
|
'{{model.unique_id}}' as node_id,
|
||||||
|
'model' as node_type,
|
||||||
|
NULL as direct_parent_id
|
||||||
|
|
||||||
|
{%- else -%}
|
||||||
|
|
||||||
|
{%- for model_parent in model.depends_on.nodes -%}
|
||||||
|
|
||||||
|
select
|
||||||
|
'{{model.name}}' as node,
|
||||||
|
'{{model.unique_id}}' as node_id,
|
||||||
|
'model' as node_type,
|
||||||
|
'{{model_parent}}' as direct_parent_id
|
||||||
|
{% if not loop.last %}union all{% endif %}
|
||||||
|
|
||||||
|
{% endfor -%}
|
||||||
|
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{% if not outer_loop.last %}union all{% endif %}
|
||||||
|
|
||||||
|
{% endfor -%}
|
||||||
|
|
||||||
|
{%- for source in graph.sources.values() -%}
|
||||||
|
|
||||||
|
{% if loop.first and graph.nodes|length > 0 %}union all{% endif %}
|
||||||
|
select
|
||||||
|
'{{source.source_name}}.{{source.name}}' as node,
|
||||||
|
'{{source.unique_id}}' as node_id,
|
||||||
|
'source' as node_type,
|
||||||
|
NULL as direct_parent_id
|
||||||
|
{% if not loop.last %}union all{% endif %}
|
||||||
|
|
||||||
|
{% endfor -%}
|
||||||
|
|
||||||
|
),
|
||||||
|
|
||||||
|
-- recursive CTE
|
||||||
|
-- one record for every node and each of its downstream children (including itself)
|
||||||
|
all_relationships as (
|
||||||
|
-- anchor
|
||||||
|
select distinct
|
||||||
|
node as parent,
|
||||||
|
node_id as parent_id,
|
||||||
|
node_type as parent_type,
|
||||||
|
node as child,
|
||||||
|
node_id as child_id,
|
||||||
|
0 as distance,
|
||||||
|
array_construct(child) as path {# snowflake-specific, but helpful for troubleshooting right now #}
|
||||||
|
from direct_relationships
|
||||||
|
-- where direct_parent is null {# optional lever to change filtering of anchor clause to only include root nodes #}
|
||||||
|
|
||||||
|
union all
|
||||||
|
|
||||||
|
-- recursive clause
|
||||||
|
select
|
||||||
|
all_relationships.parent as parent,
|
||||||
|
all_relationships.parent_id as parent_id,
|
||||||
|
all_relationships.parent_type as parent_type,
|
||||||
|
direct_relationships.node as child,
|
||||||
|
direct_relationships.node_id as child_id,
|
||||||
|
all_relationships.distance+1 as distance,
|
||||||
|
array_append(all_relationships.path, direct_relationships.node) as path
|
||||||
|
from direct_relationships
|
||||||
|
inner join all_relationships
|
||||||
|
on all_relationships.child_id = direct_relationships.direct_parent_id
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select
|
||||||
|
parent,
|
||||||
|
parent_type,
|
||||||
|
child,
|
||||||
|
distance,
|
||||||
|
path
|
||||||
|
from all_relationships
|
||||||
|
)
|
||||||
|
|
||||||
|
select * from final
|
||||||
|
order by parent, distance
|
||||||
11
tests/generic/test_is_empty.sql
Normal file
11
tests/generic/test_is_empty.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{% test is_empty(model) %}
|
||||||
|
|
||||||
|
{{ config (
|
||||||
|
severity = 'warn',
|
||||||
|
fail_calc = "n_records"
|
||||||
|
) }}
|
||||||
|
|
||||||
|
select count(*) as n_records
|
||||||
|
from {{ model }}
|
||||||
|
|
||||||
|
{% endtest %}
|
||||||
Reference in New Issue
Block a user