Support for redshift 821 (#10448)

* Add breakpoint. * Move breakpoint. * Add fix * Add changelog. * Avoid sorting for the string case. * Add unit test. * Fix test. * add good unit tests for coverage of sort method. * add sql format coverage. * Modify behavior to log a warning and proceed. * code review comments. --------- Co-authored-by: Mila Page <versusfacit@users.noreply.github.com>
2024-07-22 13:54:46 -07:00
parent c668846404
commit 79ad0a3243
3 changed files with 168 additions and 1 deletions
--- a/.changes/unreleased/Fixes-20240625-171737.yaml
+++ b/.changes/unreleased/Fixes-20240625-171737.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Attempt to provide test fixture tables with all values to set types correctly
+  for comparisong with source tables
+time: 2024-06-25T17:17:37.514619-07:00
+custom:
+  Author: versusfacit
+  Issue: "10365"
--- a/core/dbt/parser/unit_tests.py
+++ b/core/dbt/parser/unit_tests.py
@@ -35,6 +35,8 @@ from dbt.parser.schemas import (
    YamlReader,
 )
 from dbt.utils import get_pseudo_test_path
+from dbt_common.events.functions import fire_event
+from dbt_common.events.types import SystemStdErr
 from dbt_extractor import ExtractionError, py_extract_from_source  # type: ignore


@@ -389,6 +391,44 @@ class UnitTestParser(YamlReader):
                    ut_fixture.fixture, self.project.project_name, unit_test_definition.unique_id
                )

+        # sanitize order of input
+        if ut_fixture.rows and (
+            ut_fixture.format == UnitTestFormat.Dict or ut_fixture.format == UnitTestFormat.CSV
+        ):
+            self._promote_first_non_none_row(ut_fixture)
+
+    def _promote_first_non_none_row(self, ut_fixture):
+        """
+        Promote the first row with no None values to the top of the ut_fixture.rows list.
+
+        This function modifies the ut_fixture object in place.
+
+        Needed for databases like Redshift which uses the first value in a column to determine
+        the column type. If the first value is None, the type is assumed to be VARCHAR(1).
+        This leads to obscure type mismatch errors centered on a unit test fixture's `expect`.
+        See https://github.com/dbt-labs/dbt-redshift/issues/821 for more info.
+        """
+        non_none_row_index = None
+
+        # Iterate through each row and its index
+        for index, row in enumerate(ut_fixture.rows):
+            # Check if all values in the row are not None
+            if all(value is not None for value in row.values()):
+                non_none_row_index = index
+                break
+
+        if non_none_row_index is None:
+            fire_event(
+                SystemStdErr(
+                    bmsg="Unit Test fixtures benefit from having at least one row free of Null values to ensure consistent column types. Failure to meet this recommendation can result in type mismatch errors between unit test source models and `expected` fixtures."
+                )
+            )
+        else:
+            ut_fixture.rows[0], ut_fixture.rows[non_none_row_index] = (
+                ut_fixture.rows[non_none_row_index],
+                ut_fixture.rows[0],
+            )
+
    def get_fixture_file_rows(self, fixture_name, project_name, utdef_unique_id):
        # find fixture file object and store unit_test_definition unique_id
        fixture = self._get_fixture(fixture_name, project_name)
--- a/tests/unit/parser/test_unit_tests.py
+++ b/tests/unit/parser/test_unit_tests.py
@@ -1,12 +1,15 @@
 from unittest import mock

-from dbt.artifacts.resources import DependsOn, UnitTestConfig
+from dbt.artifacts.resources import DependsOn, UnitTestConfig, UnitTestFormat
 from dbt.contracts.graph.nodes import NodeType, UnitTestDefinition
 from dbt.contracts.graph.unparsed import UnitTestOutputFixture
 from dbt.parser import SchemaParser
 from dbt.parser.unit_tests import UnitTestParser
+from dbt_common.events.event_manager_client import add_callback_to_manager
+from dbt_common.events.types import SystemStdErr
 from tests.unit.parser.test_parser import SchemaParserTest, assertEqualNodes
 from tests.unit.utils import MockNode
+from tests.utils import EventCatcher

 UNIT_TEST_MODEL_NOT_FOUND_SOURCE = """
 unit_tests:
@@ -79,6 +82,59 @@ unit_tests:
          - {a: 1}
 """

+UNIT_TEST_NONE_ROWS_SORT = """
+unit_tests:
+  - name: test_my_model_null_handling
+    model: my_model
+    description: "unit test description"
+    given: []
+    expect:
+        rows:
+        - {"id":  , "col1": "d"}
+        - {"id":  , "col1": "e"}
+        - {"id": 6, "col1": "f"}
+"""
+
+UNIT_TEST_NONE_ROWS_SORT_CSV = """
+unit_tests:
+  - name: test_my_model_null_handling
+    model: my_model
+    description: "unit test description"
+    given: []
+    expect:
+        format: csv
+        rows: |
+          id,col1
+          ,d
+          ,e
+          6,f
+"""
+
+UNIT_TEST_NONE_ROWS_SORT_SQL = """
+unit_tests:
+  - name: test_my_model_null_handling
+    model: my_model
+    description: "unit test description"
+    given: []
+    expect:
+        format: sql
+        rows: |
+          select null
+          select 1
+"""
+
+UNIT_TEST_NONE_ROWS_SORT_FAILS = """
+unit_tests:
+  - name: test_my_model_null_handling
+    model: my_model
+    description: "this unit test needs one non-None value row"
+    given: []
+    expect:
+        rows:
+        - {"id":  , "col1": "d"}
+        - {"id":  , "col1": "e"}
+"""
+

 class UnitTestParserTest(SchemaParserTest):
    def setUp(self):
@@ -173,3 +229,67 @@ class UnitTestParserTest(SchemaParserTest):
        for unit_test in self.parser.manifest.unit_tests.values():
            self.assertEqual(len(unit_test.depends_on.nodes), 1)
            self.assertEqual(unit_test.depends_on.nodes[0], "model.snowplow.my_model")
+
+    def _assert_fixture_yml_reorders_to_expected_rows(
+        self, unit_test_fixture_yml, fixture_expected_field_format, expected_rows
+    ):
+        block = self.yaml_block_for(unit_test_fixture_yml, "test_my_model.yml")
+
+        UnitTestParser(self.parser, block).parse()
+
+        self.assert_has_manifest_lengths(self.parser.manifest, nodes=1, unit_tests=1)
+        unit_test = list(self.parser.manifest.unit_tests.values())[0]
+        expected = UnitTestDefinition(
+            name="test_my_model_null_handling",
+            model="my_model",
+            resource_type=NodeType.Unit,
+            package_name="snowplow",
+            path=block.path.relative_path,
+            original_file_path=block.path.original_file_path,
+            unique_id="unit_test.snowplow.my_model.test_my_model_null_handling",
+            given=[],
+            expect=UnitTestOutputFixture(format=fixture_expected_field_format, rows=expected_rows),
+            description="unit test description",
+            overrides=None,
+            depends_on=DependsOn(nodes=["model.snowplow.my_model"]),
+            fqn=["snowplow", "my_model", "test_my_model_null_handling"],
+            config=UnitTestConfig(),
+            schema="test_schema",
+        )
+        expected.build_unit_test_checksum()
+        assertEqualNodes(unit_test, expected)
+
+    def test_expected_promote_non_none_row_dct(self):
+        expected_rows = [
+            {"id": 6, "col1": "f"},
+            {"id": None, "col1": "e"},
+            {"id": None, "col1": "d"},
+        ]
+        self._assert_fixture_yml_reorders_to_expected_rows(
+            UNIT_TEST_NONE_ROWS_SORT, UnitTestFormat.Dict, expected_rows
+        )
+
+    def test_expected_promote_non_none_row_csv(self):
+        expected_rows = [
+            {"id": "6", "col1": "f"},
+            {"id": None, "col1": "e"},
+            {"id": None, "col1": "d"},
+        ]
+        self._assert_fixture_yml_reorders_to_expected_rows(
+            UNIT_TEST_NONE_ROWS_SORT_CSV, UnitTestFormat.CSV, expected_rows
+        )
+
+    def test_expected_promote_non_none_row_sql(self):
+        expected_rows = "select null\n" + "select 1"
+        self._assert_fixture_yml_reorders_to_expected_rows(
+            UNIT_TEST_NONE_ROWS_SORT_SQL, UnitTestFormat.SQL, expected_rows
+        )
+
+    def test_no_full_row_does_not_raise_exception(self):
+        catcher = EventCatcher(SystemStdErr)
+        add_callback_to_manager(catcher.catch)
+
+        block = self.yaml_block_for(UNIT_TEST_NONE_ROWS_SORT_FAILS, "test_my_model.yml")
+        UnitTestParser(self.parser, block).parse()
+
+        assert len(catcher.caught_events) == 1