Support for redshift 821 (#10448)

* Add breakpoint.

* Move breakpoint.

* Add fix

* Add changelog.

* Avoid sorting for the string case.

* Add unit test.

* Fix test.

* add good unit tests for coverage of sort method.

* add sql format coverage.

* Modify behavior to log a warning and proceed.

* code review comments.

---------

Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
This commit is contained in:
Mila Page
2024-07-22 13:54:46 -07:00
committed by GitHub
parent c668846404
commit 79ad0a3243
3 changed files with 168 additions and 1 deletions

View File

@@ -0,0 +1,7 @@
kind: Fixes
body: Attempt to provide test fixture tables with all values to set types correctly
for comparisong with source tables
time: 2024-06-25T17:17:37.514619-07:00
custom:
Author: versusfacit
Issue: "10365"

View File

@@ -35,6 +35,8 @@ from dbt.parser.schemas import (
YamlReader,
)
from dbt.utils import get_pseudo_test_path
from dbt_common.events.functions import fire_event
from dbt_common.events.types import SystemStdErr
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
@@ -389,6 +391,44 @@ class UnitTestParser(YamlReader):
ut_fixture.fixture, self.project.project_name, unit_test_definition.unique_id
)
# sanitize order of input
if ut_fixture.rows and (
ut_fixture.format == UnitTestFormat.Dict or ut_fixture.format == UnitTestFormat.CSV
):
self._promote_first_non_none_row(ut_fixture)
def _promote_first_non_none_row(self, ut_fixture):
"""
Promote the first row with no None values to the top of the ut_fixture.rows list.
This function modifies the ut_fixture object in place.
Needed for databases like Redshift which uses the first value in a column to determine
the column type. If the first value is None, the type is assumed to be VARCHAR(1).
This leads to obscure type mismatch errors centered on a unit test fixture's `expect`.
See https://github.com/dbt-labs/dbt-redshift/issues/821 for more info.
"""
non_none_row_index = None
# Iterate through each row and its index
for index, row in enumerate(ut_fixture.rows):
# Check if all values in the row are not None
if all(value is not None for value in row.values()):
non_none_row_index = index
break
if non_none_row_index is None:
fire_event(
SystemStdErr(
bmsg="Unit Test fixtures benefit from having at least one row free of Null values to ensure consistent column types. Failure to meet this recommendation can result in type mismatch errors between unit test source models and `expected` fixtures."
)
)
else:
ut_fixture.rows[0], ut_fixture.rows[non_none_row_index] = (
ut_fixture.rows[non_none_row_index],
ut_fixture.rows[0],
)
def get_fixture_file_rows(self, fixture_name, project_name, utdef_unique_id):
# find fixture file object and store unit_test_definition unique_id
fixture = self._get_fixture(fixture_name, project_name)

View File

@@ -1,12 +1,15 @@
from unittest import mock
from dbt.artifacts.resources import DependsOn, UnitTestConfig
from dbt.artifacts.resources import DependsOn, UnitTestConfig, UnitTestFormat
from dbt.contracts.graph.nodes import NodeType, UnitTestDefinition
from dbt.contracts.graph.unparsed import UnitTestOutputFixture
from dbt.parser import SchemaParser
from dbt.parser.unit_tests import UnitTestParser
from dbt_common.events.event_manager_client import add_callback_to_manager
from dbt_common.events.types import SystemStdErr
from tests.unit.parser.test_parser import SchemaParserTest, assertEqualNodes
from tests.unit.utils import MockNode
from tests.utils import EventCatcher
UNIT_TEST_MODEL_NOT_FOUND_SOURCE = """
unit_tests:
@@ -79,6 +82,59 @@ unit_tests:
- {a: 1}
"""
UNIT_TEST_NONE_ROWS_SORT = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
rows:
- {"id": , "col1": "d"}
- {"id": , "col1": "e"}
- {"id": 6, "col1": "f"}
"""
UNIT_TEST_NONE_ROWS_SORT_CSV = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
format: csv
rows: |
id,col1
,d
,e
6,f
"""
UNIT_TEST_NONE_ROWS_SORT_SQL = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "unit test description"
given: []
expect:
format: sql
rows: |
select null
select 1
"""
UNIT_TEST_NONE_ROWS_SORT_FAILS = """
unit_tests:
- name: test_my_model_null_handling
model: my_model
description: "this unit test needs one non-None value row"
given: []
expect:
rows:
- {"id": , "col1": "d"}
- {"id": , "col1": "e"}
"""
class UnitTestParserTest(SchemaParserTest):
def setUp(self):
@@ -173,3 +229,67 @@ class UnitTestParserTest(SchemaParserTest):
for unit_test in self.parser.manifest.unit_tests.values():
self.assertEqual(len(unit_test.depends_on.nodes), 1)
self.assertEqual(unit_test.depends_on.nodes[0], "model.snowplow.my_model")
def _assert_fixture_yml_reorders_to_expected_rows(
self, unit_test_fixture_yml, fixture_expected_field_format, expected_rows
):
block = self.yaml_block_for(unit_test_fixture_yml, "test_my_model.yml")
UnitTestParser(self.parser, block).parse()
self.assert_has_manifest_lengths(self.parser.manifest, nodes=1, unit_tests=1)
unit_test = list(self.parser.manifest.unit_tests.values())[0]
expected = UnitTestDefinition(
name="test_my_model_null_handling",
model="my_model",
resource_type=NodeType.Unit,
package_name="snowplow",
path=block.path.relative_path,
original_file_path=block.path.original_file_path,
unique_id="unit_test.snowplow.my_model.test_my_model_null_handling",
given=[],
expect=UnitTestOutputFixture(format=fixture_expected_field_format, rows=expected_rows),
description="unit test description",
overrides=None,
depends_on=DependsOn(nodes=["model.snowplow.my_model"]),
fqn=["snowplow", "my_model", "test_my_model_null_handling"],
config=UnitTestConfig(),
schema="test_schema",
)
expected.build_unit_test_checksum()
assertEqualNodes(unit_test, expected)
def test_expected_promote_non_none_row_dct(self):
expected_rows = [
{"id": 6, "col1": "f"},
{"id": None, "col1": "e"},
{"id": None, "col1": "d"},
]
self._assert_fixture_yml_reorders_to_expected_rows(
UNIT_TEST_NONE_ROWS_SORT, UnitTestFormat.Dict, expected_rows
)
def test_expected_promote_non_none_row_csv(self):
expected_rows = [
{"id": "6", "col1": "f"},
{"id": None, "col1": "e"},
{"id": None, "col1": "d"},
]
self._assert_fixture_yml_reorders_to_expected_rows(
UNIT_TEST_NONE_ROWS_SORT_CSV, UnitTestFormat.CSV, expected_rows
)
def test_expected_promote_non_none_row_sql(self):
expected_rows = "select null\n" + "select 1"
self._assert_fixture_yml_reorders_to_expected_rows(
UNIT_TEST_NONE_ROWS_SORT_SQL, UnitTestFormat.SQL, expected_rows
)
def test_no_full_row_does_not_raise_exception(self):
catcher = EventCatcher(SystemStdErr)
add_callback_to_manager(catcher.catch)
block = self.yaml_block_for(UNIT_TEST_NONE_ROWS_SORT_FAILS, "test_my_model.yml")
UnitTestParser(self.parser, block).parse()
assert len(catcher.caught_events) == 1