Add test_ut_casing.py, works in Snowflake not in Postgres

Modify _get_unit_test_agate_table to handle uppercase ACTUAL_OR_EXPECTED
2024-02-02 16:54:01 -05:00 · 2024-02-02 16:49:56 -05:00
2 changed files with 157 additions and 6 deletions
--- a/core/dbt/task/test.py
+++ b/core/dbt/task/test.py
@@ -226,7 +226,7 @@ class TestRunner(CompileRunner):
        # could eventually be returned directly by materialization
        result = context["load_result"]("main")
        adapter_response = result["response"].to_dict(omit_none=True)
-        table = result["table"]
+        table: agate.Table = result["table"]
        actual = self._get_unit_test_agate_table(table, "actual")
        expected = self._get_unit_test_agate_table(table, "expected")

@@ -320,12 +320,22 @@ class TestRunner(CompileRunner):
    def after_execute(self, result):
        self.print_result_line(result)

-    def _get_unit_test_agate_table(self, result_table, actual_or_expected: str):
-        unit_test_table = result_table.where(
-            lambda row: row["actual_or_expected"] == actual_or_expected
-        )
+    def _get_unit_test_agate_table(
+        self, result_table: agate.Table, actual_or_expected: str
+    ) -> agate.Table:
+        def where_actual_or_expected(row):
+            if "actual_or_expected" in row:
+                return row["actual_or_expected"] == actual_or_expected
+            elif "ACTUAL_OR_EXPECTED" in row:
+                return row["ACTUAL_OR_EXPECTED"] == actual_or_expected.upper()
+            return False
+
+        unit_test_table = result_table.where(where_actual_or_expected)
        columns = list(unit_test_table.columns.keys())
-        columns.remove("actual_or_expected")
+        if "ACTUAL_OR_EXPECTED" in columns:
+            columns.remove("ACTUAL_OR_EXPECTED")
+        if "actual_or_expected" in columns:
+            columns.remove("actual_or_expected")
        return unit_test_table.select(columns)

    def _get_daff_diff(
--- a/tests/adapter/dbt/tests/adapter/unit_testing/test_ut_casing.py
+++ b/tests/adapter/dbt/tests/adapter/unit_testing/test_ut_casing.py
@@ -0,0 +1,141 @@
+import pytest
+from dbt.tests.util import run_dbt
+
+unit_tests_yml = """
+unit_tests:
+  - name: test_valid_email_address # this is the unique name of the test
+    description: my favorite unit test
+    model: dim_wizards # name of the model I'm unit testing
+    given: # the mock data for your inputs
+      - input: ref('stg_wizards')
+        rows:
+          - {WIZARD_ID: "1", EMAIL: cool@example.com,     EMAIL_TOP_LEVEL_DOMAIN: example.com}
+          - {WIZARD_ID: "2", EMAIL: cool@unknown.com,     EMAIL_TOP_LEVEL_DOMAIN: unknown.com}
+          - {WIZARD_ID: "3", EMAIL: badgmail.com,         EMAIL_TOP_LEVEL_DOMAIN: gmail.com}
+          - {WIZARD_ID: "4", EMAIL: missingdot@gmailcom,  EMAIL_TOP_LEVEL_DOMAIN: gmail.com}
+      - input: ref('top_level_email_domains')
+        rows:
+          - {TLD: example.com}
+          - {TLD: gmail.com}
+      - input: ref('stg_worlds')
+        rows: []
+    expect: # the expected output given the inputs above
+      rows:
+        - {WIZARD_ID: "1", IS_VALID_EMAIL_ADDRESS: true}
+        - {WIZARD_ID: "2", IS_VALID_EMAIL_ADDRESS: false}
+        - {WIZARD_ID: "3", IS_VALID_EMAIL_ADDRESS: false}
+        - {WIZARD_ID: "4", IS_VALID_EMAIL_ADDRESS: false}
+"""
+
+stg_wizards_sql = """
+select
+    1 as wizard_id,
+    'tom' as wizard_name,
+    'cool@example.com' as email,
+    '999-999-9999' as phone_number,
+    1 as world_id,
+    'example.com' as email_top_level_domain
+"""
+
+stg_worlds_sql = """
+select
+    1 as world_id,
+    'Erewhon' as world_name
+"""
+
+top_level_email_domains_seed = """tld
+gmail.com
+yahoo.com
+hocuspocus.com
+dbtlabs.com
+hotmail.com
+"""
+
+dim_wizards_sql = """
+with wizards as (
+
+    select * from {{ ref('stg_wizards') }}
+
+),
+
+worlds as (
+
+    select * from {{ ref('stg_worlds') }}
+
+),
+
+accepted_email_domains as (
+
+    select * from {{ ref('top_level_email_domains') }}
+
+),
+
+check_valid_emails as (
+
+    select  
+        wizards.wizard_id,
+        wizards.wizard_name,
+        wizards.email,
+        wizards.phone_number,
+        wizards.world_id,
+
+		coalesce (regexp_like(
+            wizards.email, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
+        )
+        = true
+        and accepted_email_domains.tld is not null,
+        false) as is_valid_email_address
+
+    from wizards
+    left join accepted_email_domains
+        on wizards.email_top_level_domain = lower(accepted_email_domains.tld)
+
+)
+
+select
+    check_valid_emails.wizard_id,
+    check_valid_emails.wizard_name,
+    check_valid_emails.email,
+    check_valid_emails.is_valid_email_address,
+    check_valid_emails.phone_number,
+    worlds.world_name
+from check_valid_emails
+left join worlds
+    on check_valid_emails.world_id = worlds.world_id
+"""
+
+schema_yml = """
+models:
+  - name: dim_wizards
+    columns:
+      - name: wizard_id 
+      - name: wizard_name
+      - name: email
+      - name: phone_number
+      - name: world_name
+"""
+
+class TestUnitTest:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "dim_wizards.sql": dim_wizards_sql,
+            "stg_wizards.sql": stg_wizards_sql,
+            "stg_worlds.sql": stg_worlds_sql,
+            "schema.yml": schema_yml,
+            "unit_tests.yml": unit_tests_yml,
+        }
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "top_level_email_domains.csv": top_level_email_domains_seed
+        }
+
+    def test_dim_wizards(self, project):
+        results = run_dbt(["seed"])
+        assert len(results) == 1
+        results = run_dbt(["run"])
+        assert len(results) == 3
+
+        results = run_dbt(["test"])
Author	SHA1	Message	Date
Gerda Shank	e71a141097	Add test_ut_casing.py, works in Snowflake not in Postgres	2024-02-02 16:54:01 -05:00
Gerda Shank	5cec497059	Modify _get_unit_test_agate_table to handle uppercase ACTUAL_OR_EXPECTED	2024-02-02 16:49:56 -05:00