Add support for Trino

2025-12-18 02:11:27 +00:00 · 2023-09-13 11:51:23 +02:00
parent 9a73300506
commit 4950ce71b2
18 changed files with 67 additions and 28 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -82,6 +82,17 @@ jobs:
      - store_artifacts:
          path: ./integration_tests/logs

+  integration-trino:
+    docker:
+      - image: cimg/python:3.9.9
+    steps:
+      - checkout
+      - run:
+          name: "Run Tests - Trino"
+          command: ./run_test.sh trino
+      - store_artifacts:
+          path: ./integration_tests/logs
+
 workflows:
  version: 2
  test-all:
@@ -104,3 +115,7 @@ workflows:
          context: profile-databricks
          requires:
            - integration-postgres
+      - integration-trino:
+          context: profile-trino
+          requires:
+            - integration-postgres
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -40,6 +40,7 @@ If applicable, add screenshots or log output to help explain your problem.
 - [ ] redshift
 - [ ] bigquery
 - [ ] snowflake
+- [ ] trino/starburst
 - [ ] other (specify: ____________)


--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -28,5 +28,6 @@ Screenshot of passing integration tests locally
    - [ ] Snowflake
    - [ ] Databricks
    - [ ] DuckDB
+    - [ ] Trino/Starburst
 - [ ] I have updated the README.md (if applicable)
 - [ ] I have added tests & descriptions to my models (and macros if applicable)
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ Currently, the following adapters are supported:
 - Redshift
 - Snowflake
 - DuckDB
+- Trino (tested with Iceberg connector)

 ## Using This Package

--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -79,8 +79,8 @@ vars:
  other_prefixes: ['rpt_']

  # -- Performance variables --
-  chained_views_threshold: 5
+  chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}"

  # -- Execution variables --
  insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}"
-  max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else -1 }}"
+  max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}"
--- a/docs/index.md
+++ b/docs/index.md
@@ -21,6 +21,7 @@ Currently, the following adapters are supported:
 - Redshift
 - Snowflake
 - DuckDB
+- Trino (tested with Iceberg connector)

 ## Using This Package

--- a/integration_tests/ci/sample.profiles.yml
+++ b/integration_tests/ci/sample.profiles.yml
@@ -58,4 +58,17 @@ integration_tests:

    duckdb:
      type: duckdb
-      path: ./duck.db
+      path: ./duck.db
+
+    trino:
+      type: trino
+      host: "{{ env_var('TRINO_TEST_HOST') }}"
+      port: "{{ env_var('TRINO_TEST_PORT') | as_number }}"
+      method: ldap
+      user: "{{ env_var('TRINO_TEST_USER') }}"
+      password: "{{ env_var('TRINO_TEST_PASS') }}"
+      catalog: "{{ env_var('TRINO_TEST_CATALOG_NAME') }}"
+      schema: dbt_project_evaluator_integration_tests_trino
+      threads: 5
+      session_properties:
+        query_max_stage_count: 200
--- a/integration_tests/seeds/docs/docs_seeds.yml
+++ b/integration_tests/seeds/docs/docs_seeds.yml
@@ -13,10 +13,10 @@ seeds:
  - name: test_fct_documentation_coverage
    config:
      column_types:
-        staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" 
-        intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" 
-        marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" 
-        other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}" 
+        staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
+        intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
+        marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
+        other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
      tags:
        - docs
    tests:
--- a/integration_tests/seeds/tests/tests_seeds.yml
+++ b/integration_tests/seeds/tests/tests_seeds.yml
@@ -10,11 +10,11 @@ seeds:
  - name: test_fct_test_coverage
    config:
      column_types:
-        test_coverage_pct: float
-        staging_test_coverage_pct: float
-        intermediate_test_coverage_pct: float
-        marts_test_coverage_pct: float
-        other_test_coverage_pct: float
+        test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
+        staging_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
+        intermediate_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
+        marts_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
+        other_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
    tests:
      - dbt_utils.equality:
          name: equality_fct_test_coverage
--- a/macros/recursive_dag.sql
+++ b/macros/recursive_dag.sql
@@ -120,7 +120,7 @@ all_relationships (
        on all_relationships.child_id = direct_relationships.direct_parent_id

    {% if var('max_depth_dag') | int > 0 %}
-        {% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold')%}
+        {% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %}
            {% do exceptions.raise_compiler_error(
                'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
                ) %}
@@ -138,7 +138,7 @@ all_relationships (

 -- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs
 {% set max_depth = var('max_depth_dag') | int %}
-{% if max_depth < 2 or max_depth < var('chained_views_threshold') %}
+{% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %}
    {% do exceptions.raise_compiler_error(
        'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
        ) %}
@@ -260,4 +260,11 @@ with direct_relationships as (
 {% macro spark__recursive_dag() %}
 -- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs
    {{ return(bigquery__recursive_dag()) }}
-{% endmacro %}
+{% endmacro %}
+
+
+{% macro trino__recursive_dag() %}
+{#-- Although Trino supports a recursive WITH-queries,
+-- it is less performant than creating CTEs with loops and unioning them --#}
+    {{ return(bigquery__recursive_dag()) }}
+{% endmacro %}
--- a/models/marts/dag/fct_duplicate_sources.sql
+++ b/models/marts/dag/fct_duplicate_sources.sql
@@ -19,7 +19,7 @@ source_duplicates as (
        {{ dbt.listagg(
            measure = 'resource_name', 
            delimiter_text = "', '", 
-            order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb']) 
+            order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb','trino'])
        }} as source_names
    from sources
    group by source_db_location
--- a/models/marts/dag/fct_model_fanout.sql
+++ b/models/marts/dag/fct_model_fanout.sql
@@ -39,7 +39,7 @@ model_fanout_agg as (
        {{ dbt.listagg(
            measure = 'child', 
            delimiter_text = "', '", 
-            order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb']) 
+            order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
        }} as leaf_children
    from model_fanout
    group by 1, 2
--- a/models/marts/dag/fct_multiple_sources_joined.sql
+++ b/models/marts/dag/fct_multiple_sources_joined.sql
@@ -18,7 +18,7 @@ multiple_sources_joined as (
        {{ dbt.listagg(
            measure='parent', 
            delimiter_text="', '", 
-            order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb']) 
+            order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb','trino'])
        }} as source_parents
    from direct_source_relationships
    group by 1
--- a/models/marts/dag/fct_source_fanout.sql
+++ b/models/marts/dag/fct_source_fanout.sql
@@ -18,7 +18,7 @@ source_fanout as (
        {{ dbt.listagg(
            measure='child', 
            delimiter_text="', '", 
-            order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb']) 
+            order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
        }} as model_children
    from direct_source_relationships
    group by 1
--- a/models/marts/documentation/fct_documentation_coverage.sql
+++ b/models/marts/documentation/fct_documentation_coverage.sql
@@ -20,10 +20,10 @@ conversion as (

 final as (
    select
-        current_timestamp as measured_at,
+        {{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
        count(*) as total_models,
        sum(is_described_model) as documented_models,
-        round(sum(is_described_model) * 100.0 / count(*), 2) as documentation_coverage_pct,
+        round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
        {% for model_type in var('model_types') %}
            round(
                {{ dbt_utils.safe_divide(
--- a/models/marts/structure/fct_model_naming_conventions.sql
+++ b/models/marts/structure/fct_model_naming_conventions.sql
@@ -20,7 +20,7 @@ appropriate_prefixes as (
        {{ dbt.listagg(
            measure='prefix_value', 
            delimiter_text="', '", 
-            order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb']) 
+            order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb','trino'])
        }} as appropriate_prefixes
    from naming_convention_prefixes
    group by model_type
--- a/models/marts/tests/fct_test_coverage.sql
+++ b/models/marts/tests/fct_test_coverage.sql
@@ -19,7 +19,7 @@ conversion as (

 final as (
    select
-        current_timestamp as measured_at,
+        {{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
        count(*) as total_models,
        sum(number_of_tests_on_model) as total_tests,
        sum(is_tested_model) as tested_models,
@@ -32,7 +32,7 @@ final as (
                ) }}
            , 2) as {{ model_type }}_test_coverage_pct,
        {% endfor %}
-        round(sum(number_of_tests_on_model) * 1.0 / count(*), 4) as test_to_model_ratio
+        round(sum(number_of_tests_on_model) * 1.0000 / count(*), 4) as test_to_model_ratio

    from test_counts
    left join conversion
--- a/seeds/seeds.yml
+++ b/seeds/seeds.yml
@@ -6,10 +6,10 @@ seeds:

    config:
      column_types:
-        fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
-        column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
-        id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
-        comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
+        fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
+        column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
+        id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
+        comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"

    columns:
      - name: fct_name