Compare commits

...

2 Commits

Author SHA1 Message Date
Gerda Shank
e71a141097 Add test_ut_casing.py, works in Snowflake not in Postgres 2024-02-02 16:54:01 -05:00
Gerda Shank
5cec497059 Modify _get_unit_test_agate_table to handle uppercase ACTUAL_OR_EXPECTED 2024-02-02 16:49:56 -05:00
2 changed files with 157 additions and 6 deletions

View File

@@ -226,7 +226,7 @@ class TestRunner(CompileRunner):
# could eventually be returned directly by materialization
result = context["load_result"]("main")
adapter_response = result["response"].to_dict(omit_none=True)
table = result["table"]
table: agate.Table = result["table"]
actual = self._get_unit_test_agate_table(table, "actual")
expected = self._get_unit_test_agate_table(table, "expected")
@@ -320,12 +320,22 @@ class TestRunner(CompileRunner):
def after_execute(self, result):
self.print_result_line(result)
def _get_unit_test_agate_table(self, result_table, actual_or_expected: str):
unit_test_table = result_table.where(
lambda row: row["actual_or_expected"] == actual_or_expected
)
def _get_unit_test_agate_table(
self, result_table: agate.Table, actual_or_expected: str
) -> agate.Table:
def where_actual_or_expected(row):
if "actual_or_expected" in row:
return row["actual_or_expected"] == actual_or_expected
elif "ACTUAL_OR_EXPECTED" in row:
return row["ACTUAL_OR_EXPECTED"] == actual_or_expected.upper()
return False
unit_test_table = result_table.where(where_actual_or_expected)
columns = list(unit_test_table.columns.keys())
columns.remove("actual_or_expected")
if "ACTUAL_OR_EXPECTED" in columns:
columns.remove("ACTUAL_OR_EXPECTED")
if "actual_or_expected" in columns:
columns.remove("actual_or_expected")
return unit_test_table.select(columns)
def _get_daff_diff(

View File

@@ -0,0 +1,141 @@
import pytest
from dbt.tests.util import run_dbt
unit_tests_yml = """
unit_tests:
- name: test_valid_email_address # this is the unique name of the test
description: my favorite unit test
model: dim_wizards # name of the model I'm unit testing
given: # the mock data for your inputs
- input: ref('stg_wizards')
rows:
- {WIZARD_ID: "1", EMAIL: cool@example.com, EMAIL_TOP_LEVEL_DOMAIN: example.com}
- {WIZARD_ID: "2", EMAIL: cool@unknown.com, EMAIL_TOP_LEVEL_DOMAIN: unknown.com}
- {WIZARD_ID: "3", EMAIL: badgmail.com, EMAIL_TOP_LEVEL_DOMAIN: gmail.com}
- {WIZARD_ID: "4", EMAIL: missingdot@gmailcom, EMAIL_TOP_LEVEL_DOMAIN: gmail.com}
- input: ref('top_level_email_domains')
rows:
- {TLD: example.com}
- {TLD: gmail.com}
- input: ref('stg_worlds')
rows: []
expect: # the expected output given the inputs above
rows:
- {WIZARD_ID: "1", IS_VALID_EMAIL_ADDRESS: true}
- {WIZARD_ID: "2", IS_VALID_EMAIL_ADDRESS: false}
- {WIZARD_ID: "3", IS_VALID_EMAIL_ADDRESS: false}
- {WIZARD_ID: "4", IS_VALID_EMAIL_ADDRESS: false}
"""
stg_wizards_sql = """
select
1 as wizard_id,
'tom' as wizard_name,
'cool@example.com' as email,
'999-999-9999' as phone_number,
1 as world_id,
'example.com' as email_top_level_domain
"""
stg_worlds_sql = """
select
1 as world_id,
'Erewhon' as world_name
"""
top_level_email_domains_seed = """tld
gmail.com
yahoo.com
hocuspocus.com
dbtlabs.com
hotmail.com
"""
dim_wizards_sql = """
with wizards as (
select * from {{ ref('stg_wizards') }}
),
worlds as (
select * from {{ ref('stg_worlds') }}
),
accepted_email_domains as (
select * from {{ ref('top_level_email_domains') }}
),
check_valid_emails as (
select
wizards.wizard_id,
wizards.wizard_name,
wizards.email,
wizards.phone_number,
wizards.world_id,
coalesce (regexp_like(
wizards.email, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
)
= true
and accepted_email_domains.tld is not null,
false) as is_valid_email_address
from wizards
left join accepted_email_domains
on wizards.email_top_level_domain = lower(accepted_email_domains.tld)
)
select
check_valid_emails.wizard_id,
check_valid_emails.wizard_name,
check_valid_emails.email,
check_valid_emails.is_valid_email_address,
check_valid_emails.phone_number,
worlds.world_name
from check_valid_emails
left join worlds
on check_valid_emails.world_id = worlds.world_id
"""
schema_yml = """
models:
- name: dim_wizards
columns:
- name: wizard_id
- name: wizard_name
- name: email
- name: phone_number
- name: world_name
"""
class TestUnitTest:
@pytest.fixture(scope="class")
def models(self):
return {
"dim_wizards.sql": dim_wizards_sql,
"stg_wizards.sql": stg_wizards_sql,
"stg_worlds.sql": stg_worlds_sql,
"schema.yml": schema_yml,
"unit_tests.yml": unit_tests_yml,
}
@pytest.fixture(scope="class")
def seeds(self):
return {
"top_level_email_domains.csv": top_level_email_domains_seed
}
def test_dim_wizards(self, project):
results = run_dbt(["seed"])
assert len(results) == 1
results = run_dbt(["run"])
assert len(results) == 3
results = run_dbt(["test"])