Add support for Trino

This commit is contained in:
Damian Owsianny
2023-09-13 11:51:23 +02:00
parent 9a73300506
commit 4950ce71b2
18 changed files with 67 additions and 28 deletions

View File

@@ -82,6 +82,17 @@ jobs:
- store_artifacts:
path: ./integration_tests/logs
integration-trino:
docker:
- image: cimg/python:3.9.9
steps:
- checkout
- run:
name: "Run Tests - Trino"
command: ./run_test.sh trino
- store_artifacts:
path: ./integration_tests/logs
workflows:
version: 2
test-all:
@@ -104,3 +115,7 @@ workflows:
context: profile-databricks
requires:
- integration-postgres
- integration-trino:
context: profile-trino
requires:
- integration-postgres

View File

@@ -40,6 +40,7 @@ If applicable, add screenshots or log output to help explain your problem.
- [ ] redshift
- [ ] bigquery
- [ ] snowflake
- [ ] trino/starburst
- [ ] other (specify: ____________)

View File

@@ -28,5 +28,6 @@ Screenshot of passing integration tests locally
- [ ] Snowflake
- [ ] Databricks
- [ ] DuckDB
- [ ] Trino/Starburst
- [ ] I have updated the README.md (if applicable)
- [ ] I have added tests & descriptions to my models (and macros if applicable)

View File

@@ -20,6 +20,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)
## Using This Package

View File

@@ -79,8 +79,8 @@ vars:
other_prefixes: ['rpt_']
# -- Performance variables --
chained_views_threshold: 5
chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}"
# -- Execution variables --
insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else -1 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}"

View File

@@ -21,6 +21,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)
## Using This Package

View File

@@ -58,4 +58,17 @@ integration_tests:
duckdb:
type: duckdb
path: ./duck.db
path: ./duck.db
trino:
type: trino
host: "{{ env_var('TRINO_TEST_HOST') }}"
port: "{{ env_var('TRINO_TEST_PORT') | as_number }}"
method: ldap
user: "{{ env_var('TRINO_TEST_USER') }}"
password: "{{ env_var('TRINO_TEST_PASS') }}"
catalog: "{{ env_var('TRINO_TEST_CATALOG_NAME') }}"
schema: dbt_project_evaluator_integration_tests_trino
threads: 5
session_properties:
query_max_stage_count: 200

View File

@@ -13,10 +13,10 @@ seeds:
- name: test_fct_documentation_coverage
config:
column_types:
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
tags:
- docs
tests:

View File

@@ -10,11 +10,11 @@ seeds:
- name: test_fct_test_coverage
config:
column_types:
test_coverage_pct: float
staging_test_coverage_pct: float
intermediate_test_coverage_pct: float
marts_test_coverage_pct: float
other_test_coverage_pct: float
test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
staging_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
intermediate_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
marts_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
other_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
tests:
- dbt_utils.equality:
name: equality_fct_test_coverage

View File

@@ -120,7 +120,7 @@ all_relationships (
on all_relationships.child_id = direct_relationships.direct_parent_id
{% if var('max_depth_dag') | int > 0 %}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold')%}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
@@ -138,7 +138,7 @@ all_relationships (
-- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs
{% set max_depth = var('max_depth_dag') | int %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
@@ -260,4 +260,11 @@ with direct_relationships as (
{% macro spark__recursive_dag() %}
-- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}
{% endmacro %}
{% macro trino__recursive_dag() %}
{#-- Although Trino supports a recursive WITH-queries,
-- it is less performant than creating CTEs with loops and unioning them --#}
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}

View File

@@ -19,7 +19,7 @@ source_duplicates as (
{{ dbt.listagg(
measure = 'resource_name',
delimiter_text = "', '",
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_names
from sources
group by source_db_location

View File

@@ -39,7 +39,7 @@ model_fanout_agg as (
{{ dbt.listagg(
measure = 'child',
delimiter_text = "', '",
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as leaf_children
from model_fanout
group by 1, 2

View File

@@ -18,7 +18,7 @@ multiple_sources_joined as (
{{ dbt.listagg(
measure='parent',
delimiter_text="', '",
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_parents
from direct_source_relationships
group by 1

View File

@@ -18,7 +18,7 @@ source_fanout as (
{{ dbt.listagg(
measure='child',
delimiter_text="', '",
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as model_children
from direct_source_relationships
group by 1

View File

@@ -20,10 +20,10 @@ conversion as (
final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(is_described_model) as documented_models,
round(sum(is_described_model) * 100.0 / count(*), 2) as documentation_coverage_pct,
round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
{% for model_type in var('model_types') %}
round(
{{ dbt_utils.safe_divide(

View File

@@ -20,7 +20,7 @@ appropriate_prefixes as (
{{ dbt.listagg(
measure='prefix_value',
delimiter_text="', '",
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as appropriate_prefixes
from naming_convention_prefixes
group by model_type

View File

@@ -19,7 +19,7 @@ conversion as (
final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(number_of_tests_on_model) as total_tests,
sum(is_tested_model) as tested_models,
@@ -32,7 +32,7 @@ final as (
) }}
, 2) as {{ model_type }}_test_coverage_pct,
{% endfor %}
round(sum(number_of_tests_on_model) * 1.0 / count(*), 4) as test_to_model_ratio
round(sum(number_of_tests_on_model) * 1.0000 / count(*), 4) as test_to_model_ratio
from test_counts
left join conversion

View File

@@ -6,10 +6,10 @@ seeds:
config:
column_types:
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
columns:
- name: fct_name