From 34209f6a7af56bf14c7cfa551099d9f34efae145 Mon Sep 17 00:00:00 2001 From: igorvoltaic Date: Tue, 3 Sep 2024 15:34:31 +0300 Subject: [PATCH] feat: add clickhouse and greenplum support (#460) Co-authored-by: Grace Goheen <53586774+graciegoheen@users.noreply.github.com> Co-authored-by: Benoit Perigaud <8754100+b-per@users.noreply.github.com> --- README.md | 4 +- dbt_project.yml | 4 +- docs/index.md | 2 + integration_tests/ci/sample.profiles.yml | 10 ++++ .../models/staging/_models.yml | 2 +- .../models/staging/excluded_model.sql | 2 +- .../marts/intermediate/_dim_model_7.yml | 2 +- .../models/marts/intermediate/dim_model_7.sql | 2 +- integration_tests/models/reports/reports.yml | 2 +- integration_tests_2/ci/sample.profiles.yml | 12 ++++- integration_tests_2/dbt_project.yml | 2 +- macros/cross_db_shim/clickhouse_shims.sql | 54 +++++++++++++++++++ macros/recursive_dag.sql | 10 ++-- macros/unpack/get_exposure_values.sql | 4 +- macros/unpack/get_metric_values.sql | 2 +- macros/unpack/get_node_values.sql | 10 ++-- macros/unpack/get_source_values.sql | 10 ++-- models/marts/core/int_all_graph_resources.sql | 2 +- .../marts/core/int_direct_relationships.sql | 13 ++++- .../fct_rejoining_of_upstream_concepts.sql | 4 +- models/marts/dag/fct_too_many_joins.sql | 2 +- .../fct_documentation_coverage.sql | 4 +- .../fct_model_naming_conventions.sql | 4 +- .../tests/fct_missing_primary_key_tests.sql | 4 +- models/marts/tests/fct_test_coverage.sql | 10 ++-- .../intermediate/int_model_test_summary.sql | 14 ++--- .../base/base_exposure_relationships.sql | 4 +- .../graph/base/base_metric_relationships.sql | 4 +- .../staging/graph/base/base_node_columns.sql | 2 +- .../graph/base/base_node_relationships.sql | 4 +- models/staging/graph/stg_exposures.sql | 4 +- models/staging/graph/stg_metrics.sql | 4 +- models/staging/graph/stg_nodes.sql | 12 ++--- models/staging/graph/stg_sources.sql | 12 ++--- seeds/seeds.yml | 8 +-- 35 files changed, 169 insertions(+), 76 deletions(-) create mode 100644 macros/cross_db_shim/clickhouse_shims.sql diff --git a/README.md b/README.md index f12edfb..42d11f5 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Currently, the following adapters are supported: - DuckDB - Trino (tested with Iceberg connector) - AWS Athena (tested manually) +- Greenplum (tested manually) +- ClickHouse (tested manually) ## Using This Package @@ -29,7 +31,7 @@ Currently, the following adapters are supported: Check [dbt Hub](https://hub.getdbt.com/dbt-labs/dbt_project_evaluator/latest/) for the latest installation instructions, or [read the docs](https://docs.getdbt.com/docs/package-management) for more information on installing packages. -### Additional setup for Databricks/Spark/DuckDB +### Additional setup for Databricks/Spark/DuckDB/Redshift/ClickHouse In your `dbt_project.yml`, add the following config: diff --git a/dbt_project.yml b/dbt_project.yml index 4eb72eb..fbb1510 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -33,7 +33,7 @@ models: +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}" int_all_dag_relationships: # required for BigQuery, Redshift, and Databricks for performance/memory reasons - +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks'] else 'view' }}" + +materialized: "{{ 'table' if target.type in ['bigquery', 'redshift', 'databricks', 'clickhouse'] else 'view' }}" dag: +materialized: table staging: @@ -86,7 +86,7 @@ vars: # -- Execution variables -- insert_batch_size: "{{ 500 if target.type in ['athena', 'bigquery'] else 10000 }}" - max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino'] else -1 }}" + max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type in ['athena', 'trino', 'clickhouse'] else -1 }}" # -- Code complexity variables -- comment_chars: ["--"] diff --git a/docs/index.md b/docs/index.md index 457aa48..7e2cb24 100644 --- a/docs/index.md +++ b/docs/index.md @@ -23,6 +23,8 @@ Currently, the following adapters are supported: - DuckDB - Trino (tested with Iceberg connector) - AWS Athena (tested manually) +- Greenplum (tested manually) +- ClickHouse (tested manually) ## Using This Package diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index 392271f..dff4277 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -72,3 +72,13 @@ integration_tests: threads: 5 session_properties: query_max_stage_count: 275 + + clickhouse: + type: clickhouse + host: "{{ env_var('CLICKHOUSE_TEST_HOST') }}" + port: "{{ env_var('CLICKHOUSE_TEST_PORT') | as_number }}" + user: "{{ env_var('CLICKHOUSE_TEST_USER') }}" + password: "{{ env_var('CLICKHOUSE_TEST_PASS') }}" + dbname: "{{ env_var('CLICKHOUSE_TEST_DBNAME') }}" + schema: dbt_project_evaluator_integration_tests_clickhouse + threads: 5 diff --git a/integration_tests/exclude_package/models/staging/_models.yml b/integration_tests/exclude_package/models/staging/_models.yml index bbfb10d..8794dc1 100644 --- a/integration_tests/exclude_package/models/staging/_models.yml +++ b/integration_tests/exclude_package/models/staging/_models.yml @@ -6,4 +6,4 @@ models: enforced: true columns: - name: id - data_type: integer \ No newline at end of file + data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'integer' }}" diff --git a/integration_tests/exclude_package/models/staging/excluded_model.sql b/integration_tests/exclude_package/models/staging/excluded_model.sql index 9db788d..bfac195 100644 --- a/integration_tests/exclude_package/models/staging/excluded_model.sql +++ b/integration_tests/exclude_package/models/staging/excluded_model.sql @@ -1,2 +1,2 @@ -- {{ source('fake_source', 'fake_source') }} -select 1 as id \ No newline at end of file +select 1 as id diff --git a/integration_tests/models/marts/intermediate/_dim_model_7.yml b/integration_tests/models/marts/intermediate/_dim_model_7.yml index 6707dd1..8c401f0 100644 --- a/integration_tests/models/marts/intermediate/_dim_model_7.yml +++ b/integration_tests/models/marts/intermediate/_dim_model_7.yml @@ -5,7 +5,7 @@ models: enforced: true columns: - name: id - data_type: int + data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'int' }}" constraints: - type: not_null tests: diff --git a/integration_tests/models/marts/intermediate/dim_model_7.sql b/integration_tests/models/marts/intermediate/dim_model_7.sql index 3ddf87e..82a9e25 100644 --- a/integration_tests/models/marts/intermediate/dim_model_7.sql +++ b/integration_tests/models/marts/intermediate/dim_model_7.sql @@ -5,5 +5,5 @@ ) }} +-- {{ ref('int_model_5') }} select * from {{ ref('stg_model_4') }} --- {{ ref('int_model_5') }} \ No newline at end of file diff --git a/integration_tests/models/reports/reports.yml b/integration_tests/models/reports/reports.yml index 4c832db..6b2c7a7 100644 --- a/integration_tests/models/reports/reports.yml +++ b/integration_tests/models/reports/reports.yml @@ -20,4 +20,4 @@ models: enforced: true columns: - name: id - data_type: integer \ No newline at end of file + data_type: "{{ 'UInt8' if target.type in ['clickhouse'] else 'integer' }}" diff --git a/integration_tests_2/ci/sample.profiles.yml b/integration_tests_2/ci/sample.profiles.yml index e3e06b0..ab0bf52 100644 --- a/integration_tests_2/ci/sample.profiles.yml +++ b/integration_tests_2/ci/sample.profiles.yml @@ -54,4 +54,14 @@ integration_tests: host: "{{ env_var('DATABRICKS_TEST_HOST') }}" http_path: "{{ env_var('DATABRICKS_TEST_HTTP_PATH') }}" token: "{{ env_var('DATABRICKS_TEST_ACCESS_TOKEN') }}" - threads: 10 \ No newline at end of file + threads: 10 + + clickhouse: + type: clickhouse + host: "{{ env_var('CLICKHOUSE_TEST_HOST') }}" + port: "{{ env_var('CLICKHOUSE_TEST_PORT') | as_number }}" + user: "{{ env_var('CLICKHOUSE_TEST_USER') }}" + password: "{{ env_var('CLICKHOUSE_TEST_PASS') }}" + dbname: "{{ env_var('CLICKHOUSE_TEST_DBNAME') }}" + schema: dbt_project_evaluator_integration_tests_clickhouse + threads: 5 diff --git a/integration_tests_2/dbt_project.yml b/integration_tests_2/dbt_project.yml index 1f656a8..e654169 100644 --- a/integration_tests_2/dbt_project.yml +++ b/integration_tests_2/dbt_project.yml @@ -38,4 +38,4 @@ models: vars: max_depth_dag: 2 chained_views_threshold: 2 - primary_key_test_macros: [["my_package.test_my_test", "dbt.test_not_null"]] \ No newline at end of file + primary_key_test_macros: [["my_package.test_my_test", "dbt.test_not_null"]] diff --git a/macros/cross_db_shim/clickhouse_shims.sql b/macros/cross_db_shim/clickhouse_shims.sql new file mode 100644 index 0000000..c19027f --- /dev/null +++ b/macros/cross_db_shim/clickhouse_shims.sql @@ -0,0 +1,54 @@ +{%- macro clickhouse__type_string() -%} + {{ 'Nullable(String)' }} +{%- endmacro %} + +{%- macro clickhouse__type_int() -%} + {{ 'Nullable(Int32)' }} +{%- endmacro %} + +{%- macro clickhouse__type_float() -%} + {{ 'Nullable(Float32)' }} +{%- endmacro %} + +{%- macro clickhouse__type_boolean() -%} + {{ 'Nullable(Bool)' }} +{%- endmacro %} + +{% macro clickhouse__replace(string_text, pattern, replacement) -%} + replaceAll(assumeNotNull({{string_text}}), {{pattern}}, {{replacement}}) +{%- endmacro %} + +{% macro clickhouse__split_part(string_text, delimiter_text, part_number) -%} + splitByChar({{delimiter_text}}, assumeNotNull({{string_text}}))[{{part_number}}] +{%- endmacro %} + +{% macro clickhouse__listagg(measure, delimiter_text, order_by_clause, limit_num) -%} + {% if order_by_clause and ' by ' in order_by_clause -%} + {% set order_by_field = order_by_clause.split(' by ')[1] %} + {% set arr = "arrayMap(x -> x.1, arrayReverseSort(x -> x.2, arrayZip(array_agg({}), array_agg({}))))".format(arr, order_by_field) %} + {% else -%} + {% set arr = "array_agg({})".format(measure) %} + {%- endif %} + + {% if limit_num -%} + arrayStringConcat(arraySlice({{ arr }}, 1, {{ limit_num }}), {{delimiter_text}}) + {% else -%} + arrayStringConcat({{ arr }}, {{delimiter_text}}) + {%- endif %} +{%- endmacro %} + +{% macro clickhouse__load_csv_rows(model, agate_table) %} + {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %} + {% set data_sql = adapter.get_csv_data(agate_table) %} + + {% if data_sql %} + {% set sql -%} + insert into {{ this.render() }} ({{ cols_sql }}) + {{ adapter.get_model_query_settings(model) }} + format CSV + {{ data_sql }} + {%- endset %} + + {% do adapter.add_query(sql, bindings=agate_table, abridge_sql_log=True) %} + {% endif %} +{% endmacro %} diff --git a/macros/recursive_dag.sql b/macros/recursive_dag.sql index 38d1614..360990e 100644 --- a/macros/recursive_dag.sql +++ b/macros/recursive_dag.sql @@ -72,7 +72,7 @@ all_relationships ( is_excluded as child_is_excluded, 0 as distance, {{ dbt.array_construct(['resource_name']) }} as path, - cast(null as boolean) as is_dependent_on_chain_of_views + cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views from direct_relationships -- where direct_parent_id is null {# optional lever to change filtering of anchor clause to only include root resources #} @@ -175,7 +175,7 @@ with direct_relationships as ( child_is_excluded, 0 as distance, {{ dbt.array_construct(['resource_name']) }} as path, - cast(null as boolean) as is_dependent_on_chain_of_views + cast(null as {{ dbt.type_boolean() }}) as is_dependent_on_chain_of_views from get_distinct ) @@ -243,7 +243,7 @@ with direct_relationships as ( child.directory_path as child_directory_path, child.file_name as child_file_name, child.is_excluded as child_is_excluded, - all_relationships_unioned.distance, + cast(all_relationships_unioned.distance as {{ dbt.type_int() }}) as distance, all_relationships_unioned.path, all_relationships_unioned.is_dependent_on_chain_of_views @@ -257,6 +257,10 @@ with direct_relationships as ( {% endmacro %} +{% macro clickhouse__recursive_dag() %} + {{ return(bigquery__recursive_dag()) }} +{% endmacro %} + {% macro spark__recursive_dag() %} -- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs {{ return(bigquery__recursive_dag()) }} diff --git a/macros/unpack/get_exposure_values.sql b/macros/unpack/get_exposure_values.sql index 7b8c9e3..e9ca70e 100644 --- a/macros/unpack/get_exposure_values.sql +++ b/macros/unpack/get_exposure_values.sql @@ -17,7 +17,7 @@ wrap_string_with_quotes(node.name), wrap_string_with_quotes(node.resource_type), wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")), - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")", wrap_string_with_quotes(node.type), wrap_string_with_quotes(node.maturity), wrap_string_with_quotes(node.package_name), @@ -35,4 +35,4 @@ {{ return(values) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/macros/unpack/get_metric_values.sql b/macros/unpack/get_metric_values.sql index c713c04..393b69f 100644 --- a/macros/unpack/get_metric_values.sql +++ b/macros/unpack/get_metric_values.sql @@ -16,7 +16,7 @@ wrap_string_with_quotes(node.name), wrap_string_with_quotes(node.resource_type), wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")), - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")", wrap_string_with_quotes(node.type), wrap_string_with_quotes(dbt.escape_single_quotes(node.label)), wrap_string_with_quotes(node.package_name), diff --git a/macros/unpack/get_node_values.sql b/macros/unpack/get_node_values.sql index 1bd5e34..1ef899e 100644 --- a/macros/unpack/get_node_values.sql +++ b/macros/unpack/get_node_values.sql @@ -23,7 +23,7 @@ wrap_string_with_quotes(node.name), wrap_string_with_quotes(node.resource_type), wrap_string_with_quotes(node.original_file_path | replace("\\","\\\\")), - "cast(" ~ node.config.enabled | trim ~ " as boolean)", + "cast(" ~ node.config.enabled | trim ~ " as " ~ dbt.type_boolean() ~ ")", wrap_string_with_quotes(node.config.materialized), wrap_string_with_quotes(node.config.on_schema_change), wrap_string_with_quotes(node.group), @@ -31,22 +31,22 @@ wrap_string_with_quotes(node.latest_version), wrap_string_with_quotes(node.version), wrap_string_with_quotes(node.deprecation_date), - "cast(" ~ contract | trim ~ " as boolean)", + "cast(" ~ contract | trim ~ " as " ~ dbt.type_boolean() ~ ")", node.columns.values() | list | length, node.columns.values() | list | selectattr('description') | list | length, wrap_string_with_quotes(node.database), wrap_string_with_quotes(node.schema), wrap_string_with_quotes(node.package_name), wrap_string_with_quotes(node.alias), - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")", "''" if not node.column_name else wrap_string_with_quotes(dbt.escape_single_quotes(node.column_name)), wrap_string_with_quotes(node.meta | tojson), wrap_string_with_quotes(dbt.escape_single_quotes(hard_coded_references)), number_lines, sql_complexity, wrap_string_with_quotes(node.get('depends_on',{}).get('macros',[]) | tojson), - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.test_metadata) | trim ~ " as boolean)", - "cast(" ~ exclude_node ~ " as boolean)", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.test_metadata) | trim ~ " as " ~ dbt.type_boolean() ~ ")", + "cast(" ~ exclude_node ~ " as " ~ dbt.type_boolean() ~ ")", ] %} diff --git a/macros/unpack/get_source_values.sql b/macros/unpack/get_source_values.sql index 00b5f5c..aa97db3 100644 --- a/macros/unpack/get_source_values.sql +++ b/macros/unpack/get_source_values.sql @@ -20,9 +20,9 @@ wrap_string_with_quotes(node.alias), wrap_string_with_quotes(node.resource_type), wrap_string_with_quotes(node.source_name), - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.source_description) | trim ~ " as boolean)", - "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as boolean)", - "cast(" ~ node.config.enabled ~ " as boolean)", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.source_description) | trim ~ " as " ~ dbt.type_boolean() ~ ")", + "cast(" ~ dbt_project_evaluator.is_not_empty_string(node.description) | trim ~ " as " ~ dbt.type_boolean() ~ ")", + "cast(" ~ node.config.enabled ~ " as " ~ dbt.type_boolean() ~ ")", wrap_string_with_quotes(node.loaded_at_field | replace("'", "_")), "cast(" ~ ((node.freshness != None) and (dbt_project_evaluator.is_not_empty_string(node.freshness.warn_after.count) or dbt_project_evaluator.is_not_empty_string(node.freshness.error_after.count))) | trim ~ " as boolean)", @@ -32,7 +32,7 @@ wrap_string_with_quotes(node.loader), wrap_string_with_quotes(node.identifier), wrap_string_with_quotes(node.meta | tojson), - "cast(" ~ exclude_source ~ " as boolean)", + "cast(" ~ exclude_source ~ " as " ~ dbt.type_boolean() ~ ")", ] %} @@ -44,4 +44,4 @@ {{ return(values) }} -{%- endmacro -%} \ No newline at end of file +{%- endmacro -%} diff --git a/models/marts/core/int_all_graph_resources.sql b/models/marts/core/int_all_graph_resources.sql index 1f08010..0168144 100644 --- a/models/marts/core/int_all_graph_resources.sql +++ b/models/marts/core/int_all_graph_resources.sql @@ -62,7 +62,7 @@ joined as ( unioned_with_calc.file_name, case when unioned_with_calc.resource_type in ('test', 'source', 'metric', 'exposure', 'seed') then null - else naming_convention_prefixes.model_type + else nullif(naming_convention_prefixes.model_type, '') end as model_type_prefix, case when unioned_with_calc.resource_type in ('test', 'source', 'metric', 'exposure', 'seed') then null diff --git a/models/marts/core/int_direct_relationships.sql b/models/marts/core/int_direct_relationships.sql index f138ae5..cf98082 100644 --- a/models/marts/core/int_direct_relationships.sql +++ b/models/marts/core/int_direct_relationships.sql @@ -45,7 +45,18 @@ direct_metrics_relationships as ( -- for all resources in the graph, find their direct parent direct_relationships as ( select - all_graph_resources.*, + all_graph_resources.resource_id, + all_graph_resources.resource_name, + all_graph_resources.resource_type, + all_graph_resources.file_path, + all_graph_resources.directory_path, + all_graph_resources.file_name, + all_graph_resources.model_type, + all_graph_resources.materialized, + all_graph_resources.is_public, + all_graph_resources.access, + all_graph_resources.source_name, + all_graph_resources.is_excluded, case when all_graph_resources.resource_type = 'source' then null when all_graph_resources.resource_type = 'exposure' then exposures.direct_parent_id diff --git a/models/marts/dag/fct_rejoining_of_upstream_concepts.sql b/models/marts/dag/fct_rejoining_of_upstream_concepts.sql index 00130b7..65a1287 100644 --- a/models/marts/dag/fct_rejoining_of_upstream_concepts.sql +++ b/models/marts/dag/fct_rejoining_of_upstream_concepts.sql @@ -33,7 +33,7 @@ single_use_resources as ( triad_relationships as ( select rejoined.parent, - rejoined.child as child, + rejoined.child, direct_child.parent as parent_and_child from rejoined left join all_relationships as direct_child @@ -66,4 +66,4 @@ final_filtered as ( select * from final_filtered -{{ filter_exceptions() }} \ No newline at end of file +{{ filter_exceptions() }} diff --git a/models/marts/dag/fct_too_many_joins.sql b/models/marts/dag/fct_too_many_joins.sql index c7ea383..6ce7261 100644 --- a/models/marts/dag/fct_too_many_joins.sql +++ b/models/marts/dag/fct_too_many_joins.sql @@ -10,7 +10,7 @@ final as ( select child as resource_name, child_file_path as file_path, - count(distinct parent) as join_count + cast(count(distinct parent) as {{ dbt.type_int() }}) as join_count from all_dag_relationships where distance = 1 group by 1, 2 diff --git a/models/marts/documentation/fct_documentation_coverage.sql b/models/marts/documentation/fct_documentation_coverage.sql index 26a4fd2..eeae0cc 100644 --- a/models/marts/documentation/fct_documentation_coverage.sql +++ b/models/marts/documentation/fct_documentation_coverage.sql @@ -21,8 +21,8 @@ conversion as ( final as ( select {{ dbt.current_timestamp() if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at, - count(*) as total_models, - sum(is_described_model) as documented_models, + cast(count(*) as {{ dbt.type_int() }}) as total_models, + cast(sum(is_described_model) as {{ dbt.type_int() }}) as documented_models, round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct, {% for model_type in var('model_types') %} round( diff --git a/models/marts/structure/fct_model_naming_conventions.sql b/models/marts/structure/fct_model_naming_conventions.sql index 02d0885..0692a41 100644 --- a/models/marts/structure/fct_model_naming_conventions.sql +++ b/models/marts/structure/fct_model_naming_conventions.sql @@ -48,10 +48,10 @@ inappropriate_model_names as ( from models left join appropriate_prefixes on models.model_type = appropriate_prefixes.model_type - where models.prefix_value is null + where nullif(models.prefix_value, '') is null ) select * from inappropriate_model_names -{{ filter_exceptions() }} \ No newline at end of file +{{ filter_exceptions() }} diff --git a/models/marts/tests/fct_missing_primary_key_tests.sql b/models/marts/tests/fct_missing_primary_key_tests.sql index c13eaff..f756df9 100644 --- a/models/marts/tests/fct_missing_primary_key_tests.sql +++ b/models/marts/tests/fct_missing_primary_key_tests.sql @@ -12,7 +12,7 @@ tests as ( final as ( select - * + resource_name, is_primary_key_tested, number_of_tests_on_model, number_of_constraints_on_model from tests where not(is_primary_key_tested) @@ -20,4 +20,4 @@ final as ( select * from final -{{ filter_exceptions() }} \ No newline at end of file +{{ filter_exceptions() }} diff --git a/models/marts/tests/fct_test_coverage.sql b/models/marts/tests/fct_test_coverage.sql index be54ad5..2638520 100644 --- a/models/marts/tests/fct_test_coverage.sql +++ b/models/marts/tests/fct_test_coverage.sql @@ -8,7 +8,7 @@ test_counts as ( conversion as ( select resource_name, - case when number_of_tests_on_model > 0 then 1 else 0 end as is_tested_model, + cast(case when number_of_tests_on_model > 0 then 1 else 0 end as {{ dbt.type_boolean() }}) as is_tested_model, {% for model_type in var('model_types') %} case when model_type = '{{ model_type }}' then 1.0 else NULL end as is_{{ model_type }}_model, case when number_of_tests_on_model > 0 and model_type = '{{ model_type }}' then 1.0 else 0 end as is_tested_{{ model_type }}_model{% if not loop.last %},{% endif %} @@ -20,10 +20,10 @@ conversion as ( final as ( select {{ dbt.current_timestamp() if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at, - count(*) as total_models, - sum(number_of_tests_on_model) as total_tests, - sum(is_tested_model) as tested_models, - round(sum(is_tested_model) * 100.0 / count(*), 2) as test_coverage_pct, + cast(count(*) as {{ dbt.type_int() }}) as total_models, + cast(sum(number_of_tests_on_model) as {{ dbt.type_int() }}) as total_tests, + sum(cast(is_tested_model as {{ dbt.type_int() }})) as tested_models, + round(sum(cast(is_tested_model as {{ dbt.type_int() }})) * 100.0 / count(*), 2) as test_coverage_pct, {% for model_type in var('model_types') %} round( {{ dbt_utils.safe_divide( diff --git a/models/marts/tests/intermediate/int_model_test_summary.sql b/models/marts/tests/intermediate/int_model_test_summary.sql index 8cc6412..db96892 100644 --- a/models/marts/tests/intermediate/int_model_test_summary.sql +++ b/models/marts/tests/intermediate/int_model_test_summary.sql @@ -70,7 +70,7 @@ agg_test_relationships as ( select direct_parent_id, - sum(case + cast(sum(case when ( {%- for test_set in var('primary_key_test_macros') %} {%- set compare_value = test_set | length %} @@ -81,9 +81,9 @@ agg_test_relationships as ( ) then 1 else 0 end - ) >= 1 as is_primary_key_tested, - sum(tests_count) as number_of_tests_on_model, - sum(constraints_count) as number_of_constraints_on_model + ) >= 1 as {{ dbt.type_boolean() }}) as is_primary_key_tested, + cast(sum(tests_count) as {{ dbt.type_int()}}) as number_of_tests_on_model, + cast(sum(constraints_count) as {{ dbt.type_int()}}) as number_of_constraints_on_model from combine_column_counts group by 1 @@ -94,9 +94,9 @@ final as ( all_graph_resources.resource_name, all_graph_resources.resource_type, all_graph_resources.model_type, - coalesce(agg_test_relationships.is_primary_key_tested, FALSE) as is_primary_key_tested, - coalesce(agg_test_relationships.number_of_tests_on_model, 0) as number_of_tests_on_model, - coalesce(agg_test_relationships.number_of_constraints_on_model, 0) as number_of_constraints_on_model + cast(coalesce(agg_test_relationships.is_primary_key_tested, FALSE) as {{ dbt.type_boolean()}}) as is_primary_key_tested, + cast(coalesce(agg_test_relationships.number_of_tests_on_model, 0) as {{ dbt.type_int()}}) as number_of_tests_on_model, + cast(coalesce(agg_test_relationships.number_of_constraints_on_model, 0) as {{ dbt.type_int()}}) as number_of_constraints_on_model from all_graph_resources left join agg_test_relationships on all_graph_resources.resource_id = agg_test_relationships.direct_parent_id diff --git a/models/staging/graph/base/base_exposure_relationships.sql b/models/staging/graph/base/base_exposure_relationships.sql index 15b147a..87f9f40 100644 --- a/models/staging/graph/base/base_exposure_relationships.sql +++ b/models/staging/graph/base/base_exposure_relationships.sql @@ -17,7 +17,7 @@ with dummy_cte as ( select cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as resource_id, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as direct_parent_id, - cast(True as boolean) as is_primary_relationship + cast(True as {{ dbt.type_boolean() }}) as is_primary_relationship from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/base/base_metric_relationships.sql b/models/staging/graph/base/base_metric_relationships.sql index 72fdc56..084dace 100644 --- a/models/staging/graph/base/base_metric_relationships.sql +++ b/models/staging/graph/base/base_metric_relationships.sql @@ -17,7 +17,7 @@ with dummy_cte as ( select cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as resource_id, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as direct_parent_id, - cast(True as boolean) as is_primary_relationship + cast(True as {{ dbt.type_boolean() }}) as is_primary_relationship from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/base/base_node_columns.sql b/models/staging/graph/base/base_node_columns.sql index 817021e..b9c33c9 100644 --- a/models/staging/graph/base/base_node_columns.sql +++ b/models/staging/graph/base/base_node_columns.sql @@ -19,7 +19,7 @@ select cast(null as {{ dbt_project_evaluator.type_large_string()}}) as description, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as data_type, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as constraints, - cast(True as boolean) as has_not_null_constraint, + cast(True as {{ dbt.type_boolean() }}) as has_not_null_constraint, cast(0 as {{ dbt.type_int() }}) as constraints_count, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as quote diff --git a/models/staging/graph/base/base_node_relationships.sql b/models/staging/graph/base/base_node_relationships.sql index ad74a52..47b6646 100644 --- a/models/staging/graph/base/base_node_relationships.sql +++ b/models/staging/graph/base/base_node_relationships.sql @@ -16,7 +16,7 @@ with dummy_cte as ( select cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as resource_id, cast(null as {{ dbt_project_evaluator.type_string_dpe()}}) as direct_parent_id, - cast(True as boolean) as is_primary_relationship + cast(True as {{ dbt.type_boolean() }}) as is_primary_relationship from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/stg_exposures.sql b/models/staging/graph/stg_exposures.sql index d36cc1e..6d16f7b 100644 --- a/models/staging/graph/stg_exposures.sql +++ b/models/staging/graph/stg_exposures.sql @@ -19,7 +19,7 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as resource_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as file_path, - cast(True as boolean) as is_described, + cast(True as {{ dbt.type_boolean() }}) as is_described, cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as exposure_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as maturity, cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as package_name, @@ -29,4 +29,4 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }} ) as meta from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/stg_metrics.sql b/models/staging/graph/stg_metrics.sql index d154983..b8c008b 100644 --- a/models/staging/graph/stg_metrics.sql +++ b/models/staging/graph/stg_metrics.sql @@ -19,7 +19,7 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as resource_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as file_path, - cast(True as boolean) as is_described, + cast(True as {{ dbt.type_boolean() }}) as is_described, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as metric_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as label, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as package_name, @@ -34,4 +34,4 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as meta from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/stg_nodes.sql b/models/staging/graph/stg_nodes.sql index c3165a1..5c792b4 100644 --- a/models/staging/graph/stg_nodes.sql +++ b/models/staging/graph/stg_nodes.sql @@ -20,7 +20,7 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as resource_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as file_path, - cast(True as boolean) as is_enabled, + cast(True as {{ dbt.type_boolean() }}) as is_enabled, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as materialized, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as on_schema_change, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as model_group, @@ -28,22 +28,22 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as latest_version, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as version, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as deprecation_date, - cast(True as boolean) as is_contract_enforced, + cast(True as {{ dbt.type_boolean() }}) as is_contract_enforced, cast(0 as {{ dbt.type_int() }}) as total_defined_columns, cast(0 as {{ dbt.type_int() }}) as total_described_columns, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as database, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as schema, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as package_name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as alias, - cast(True as boolean) as is_described, + cast(True as {{ dbt.type_boolean() }}) as is_described, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as column_name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as meta, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as hard_coded_references, cast(null as {{ dbt.type_int() }}) as number_lines, cast(null as {{ dbt.type_float() }}) as sql_complexity, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as macro_dependencies, - cast(True as boolean) as is_generic_test, - cast(True as boolean) as is_excluded + cast(True as {{ dbt.type_boolean() }}) as is_generic_test, + cast(True as {{ dbt.type_boolean() }}) as is_excluded from dummy_cte -where false \ No newline at end of file +where false diff --git a/models/staging/graph/stg_sources.sql b/models/staging/graph/stg_sources.sql index 328a3c3..cb2fa92 100644 --- a/models/staging/graph/stg_sources.sql +++ b/models/staging/graph/stg_sources.sql @@ -22,18 +22,18 @@ select cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as alias, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as resource_type, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as source_name, - cast(True as boolean) as is_source_described, - cast(True as boolean) as is_described, - cast(True as boolean) as is_enabled, + cast(True as {{ dbt.type_boolean() }}) as is_source_described, + cast(True as {{ dbt.type_boolean() }}) as is_described, + cast(True as {{ dbt.type_boolean() }}) as is_enabled, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as loaded_at_field, - cast(True as boolean) as is_freshness_enabled, + cast(True as {{ dbt.type_boolean() }}) as is_freshness_enabled, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as database, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as schema, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as package_name, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as loader, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as identifier, cast(null as {{ dbt_project_evaluator.type_string_dpe() }}) as meta, - cast(True as boolean) as is_excluded + cast(True as {{ dbt.type_boolean() }}) as is_excluded from dummy_cte -where false \ No newline at end of file +where false diff --git a/seeds/seeds.yml b/seeds/seeds.yml index a5a2853..7fb7004 100644 --- a/seeds/seeds.yml +++ b/seeds/seeds.yml @@ -6,10 +6,10 @@ seeds: config: column_types: - fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" - column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" - id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" - comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}" + fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino', 'greenplum'] else 'String' if target.type in ['clickhouse'] else 'string' }}" + column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino', 'greenplum'] else 'String' if target.type in ['clickhouse'] else 'string' }}" + id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino', 'greenplum'] else 'String' if target.type in ['clickhouse'] else 'string' }}" + comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino', 'greenplum'] else 'String' if target.type in ['clickhouse'] else 'string' }}" columns: - name: fct_name