mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
* add python 3.12 linting * update locked versions to make project installable on py 3.12 * update flake8 * downgrade poetry for all tests relying on python3.8 * drop python 3.8 * enable python3.13 * copy test updates from python3.13 branch * update locked sentry version * pin poetry to 1.8.5 * install ibis outside of poetry * rename to workflows for consistency * switch to published alpha version of dlt-pendulum for python 3.13 * fix images * add note to readme
746 lines
27 KiB
Python
746 lines
27 KiB
Python
import sys
|
|
from copy import copy
|
|
from dataclasses import dataclass, field
|
|
import uuid
|
|
import pytest
|
|
from typing import (
|
|
ClassVar,
|
|
Final,
|
|
Generic,
|
|
Sequence,
|
|
Mapping,
|
|
Dict,
|
|
MutableMapping,
|
|
MutableSequence,
|
|
TypeVar,
|
|
Union,
|
|
Optional,
|
|
List,
|
|
Any,
|
|
)
|
|
from typing_extensions import Annotated, get_args, get_origin
|
|
from enum import Enum
|
|
|
|
from datetime import datetime, date, time # noqa: I251
|
|
from dlt.common import Decimal
|
|
from dlt.common import json
|
|
from dlt.common.schema.typing import TColumnType
|
|
|
|
from dlt.common.libs.pydantic import (
|
|
DltConfig,
|
|
pydantic_to_table_schema_columns,
|
|
apply_schema_contract_to_model,
|
|
validate_and_filter_item,
|
|
validate_and_filter_items,
|
|
create_list_model,
|
|
)
|
|
from pydantic import UUID4, BaseModel, Json, AnyHttpUrl, ConfigDict, ValidationError
|
|
|
|
from dlt.common.schema.exceptions import DataValidationError
|
|
|
|
|
|
class StrEnum(str, Enum):
|
|
a = "a_value"
|
|
b = "b_value"
|
|
c = "c_value"
|
|
|
|
|
|
class IntEnum(int, Enum):
|
|
a = 0
|
|
b = 1
|
|
c = 2
|
|
|
|
|
|
class MixedEnum(Enum):
|
|
a_int = 0
|
|
b_str = "b_value"
|
|
c_int = 2
|
|
|
|
|
|
class NestedModel(BaseModel):
|
|
nested_field: str
|
|
|
|
|
|
class Model(BaseModel):
|
|
bigint_field: int
|
|
text_field: str
|
|
timestamp_field: datetime
|
|
date_field: date
|
|
decimal_field: Decimal
|
|
double_field: float
|
|
time_field: time
|
|
|
|
nested_field: NestedModel
|
|
list_field: List[str]
|
|
|
|
union_field: Union[int, str]
|
|
|
|
optional_field: Optional[float]
|
|
|
|
blank_dict_field: dict # type: ignore[type-arg]
|
|
parametrized_dict_field: Dict[str, int]
|
|
|
|
str_enum_field: StrEnum
|
|
int_enum_field: IntEnum
|
|
# Both of these shouold coerce to str
|
|
mixed_enum_int_field: MixedEnum
|
|
mixed_enum_str_field: MixedEnum
|
|
|
|
json_field: Json[List[str]]
|
|
|
|
url_field: AnyHttpUrl
|
|
|
|
any_field: Any
|
|
json_any_field: Json[Any]
|
|
|
|
|
|
class ModelWithConfig(Model):
|
|
model_config = ConfigDict(frozen=True, extra="allow")
|
|
|
|
|
|
TEST_MODEL_INSTANCE = Model(
|
|
bigint_field=1,
|
|
text_field="text",
|
|
timestamp_field=datetime.now(),
|
|
date_field=date.today(),
|
|
decimal_field=Decimal(1.1),
|
|
double_field=1.1,
|
|
time_field=time(1, 2, 3, 12345),
|
|
nested_field=NestedModel(nested_field="nested"),
|
|
list_field=["a", "b", "c"],
|
|
union_field=1,
|
|
optional_field=None,
|
|
blank_dict_field={},
|
|
parametrized_dict_field={"a": 1, "b": 2, "c": 3},
|
|
str_enum_field=StrEnum.a,
|
|
int_enum_field=IntEnum.a,
|
|
mixed_enum_int_field=MixedEnum.a_int,
|
|
mixed_enum_str_field=MixedEnum.b_str,
|
|
json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type]
|
|
url_field="https://example.com", # type: ignore[arg-type]
|
|
any_field="any_string",
|
|
json_any_field=json.dumps("any_string"),
|
|
)
|
|
|
|
|
|
class BookGenre(str, Enum):
|
|
scifi = "scifi"
|
|
action = "action"
|
|
thriller = "thriller"
|
|
|
|
|
|
@dataclass
|
|
class BookInfo:
|
|
isbn: Optional[str] = field(default="ISBN")
|
|
author: Optional[str] = field(default="Charles Bukowski")
|
|
|
|
|
|
class UserLabel(BaseModel):
|
|
label: str
|
|
|
|
|
|
class UserAddress(BaseModel):
|
|
street: str
|
|
zip_code: Sequence[int]
|
|
label: Optional[UserLabel]
|
|
ro_labels: Mapping[str, UserLabel]
|
|
wr_labels: MutableMapping[str, List[UserLabel]]
|
|
ro_list: Sequence[UserLabel]
|
|
wr_list: MutableSequence[Dict[str, UserLabel]]
|
|
|
|
|
|
class User(BaseModel):
|
|
user_id: int
|
|
account_id: UUID4
|
|
optional_uuid: Optional[UUID4]
|
|
name: Annotated[str, "PII", "name"]
|
|
favorite_book: Annotated[Union[Annotated[BookInfo, "meta"], BookGenre, None], "union metadata"]
|
|
created_at: Optional[datetime]
|
|
labels: List[str]
|
|
user_label: UserLabel
|
|
user_labels: List[UserLabel]
|
|
address: Annotated[UserAddress, "PII", "address"]
|
|
uuid_or_str: Union[str, UUID4, None]
|
|
unity: Union[UserAddress, UserLabel, Dict[str, UserAddress]]
|
|
# NOTE: added "int" because this type was clashing with a type
|
|
# in a delta-rs library that got cached and that re-orders the union
|
|
location: Annotated[Optional[Union[str, List[str], int]], None]
|
|
something_required: Annotated[Union[str, int], type(None)]
|
|
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc]
|
|
final_optional: Final[Annotated[Optional[str], None]] # type: ignore[misc]
|
|
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
|
|
|
|
|
|
USER_INSTANCE_DATA = dict(
|
|
user_id=1,
|
|
account_id=uuid.uuid4(),
|
|
optional_uuid=None,
|
|
favorite_book=BookInfo(isbn="isbn-xyz", author="author"),
|
|
name="random name",
|
|
created_at=datetime.now(),
|
|
labels=["str"],
|
|
user_label=dict(label="123"),
|
|
user_labels=[
|
|
dict(label="123"),
|
|
],
|
|
address=dict(
|
|
street="random street",
|
|
zip_code=[1234566, 4567789],
|
|
label=dict(label="123"),
|
|
ro_labels={
|
|
"x": dict(label="123"),
|
|
},
|
|
wr_labels={
|
|
"y": [
|
|
dict(label="123"),
|
|
]
|
|
},
|
|
ro_list=[
|
|
dict(label="123"),
|
|
],
|
|
wr_list=[
|
|
{
|
|
"x": dict(label="123"),
|
|
}
|
|
],
|
|
),
|
|
unity=dict(label="123"),
|
|
uuid_or_str=uuid.uuid4(),
|
|
location="Florida keys",
|
|
final_location="Ginnie Springs",
|
|
something_required=123,
|
|
final_optional=None,
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("instance", [True, False])
|
|
def test_pydantic_model_to_columns(instance: bool) -> None:
|
|
if instance:
|
|
model = TEST_MODEL_INSTANCE
|
|
else:
|
|
model = Model # type: ignore[assignment]
|
|
|
|
result = pydantic_to_table_schema_columns(model)
|
|
|
|
assert result["bigint_field"]["data_type"] == "bigint"
|
|
assert result["text_field"]["data_type"] == "text"
|
|
assert result["timestamp_field"]["data_type"] == "timestamp"
|
|
assert result["date_field"]["data_type"] == "date"
|
|
assert result["decimal_field"]["data_type"] == "decimal"
|
|
assert result["double_field"]["data_type"] == "double"
|
|
assert result["time_field"]["data_type"] == "time"
|
|
assert result["nested_field"]["data_type"] == "json"
|
|
assert result["list_field"]["data_type"] == "json"
|
|
assert result["union_field"]["data_type"] == "bigint"
|
|
assert result["optional_field"]["data_type"] == "double"
|
|
assert result["optional_field"]["nullable"] is True
|
|
assert result["blank_dict_field"]["data_type"] == "json"
|
|
assert result["parametrized_dict_field"]["data_type"] == "json"
|
|
assert result["str_enum_field"]["data_type"] == "text"
|
|
assert result["int_enum_field"]["data_type"] == "bigint"
|
|
assert result["mixed_enum_int_field"]["data_type"] == "text"
|
|
assert result["mixed_enum_str_field"]["data_type"] == "text"
|
|
assert result["json_field"]["data_type"] == "json"
|
|
assert result["url_field"]["data_type"] == "text"
|
|
|
|
# Any type fields are excluded from schema
|
|
assert "any_field" not in result
|
|
assert "json_any_field" not in result
|
|
|
|
|
|
def test_pydantic_model_to_columns_annotated() -> None:
|
|
# We need to check if pydantic_to_table_schema_columns is idempotent
|
|
# and can generate the same schema from the class and from the class instance.
|
|
schema_from_user_class = pydantic_to_table_schema_columns(User)
|
|
schema_from_user_instance = pydantic_to_table_schema_columns(User(**USER_INSTANCE_DATA)) # type: ignore
|
|
assert schema_from_user_class == schema_from_user_instance
|
|
assert schema_from_user_class["location"]["nullable"] is True
|
|
assert schema_from_user_class["final_location"]["nullable"] is False
|
|
assert schema_from_user_class["something_required"]["nullable"] is False
|
|
assert schema_from_user_class["final_optional"]["nullable"] is True
|
|
|
|
|
|
def test_pydantic_model_skip_nested_types() -> None:
|
|
class SkipNestedModel(Model):
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
|
|
|
|
result = pydantic_to_table_schema_columns(SkipNestedModel)
|
|
|
|
assert result["bigint_field"]["data_type"] == "bigint"
|
|
assert "nested_field" not in result
|
|
assert "list_field" not in result
|
|
assert "blank_dict_field" not in result
|
|
assert "parametrized_dict_field" not in result
|
|
assert "json_field" not in result
|
|
assert result["bigint_field"]["data_type"] == "bigint"
|
|
assert result["text_field"]["data_type"] == "text"
|
|
assert result["timestamp_field"]["data_type"] == "timestamp"
|
|
|
|
|
|
def test_model_for_column_mode() -> None:
|
|
# extra prop
|
|
instance_extra = TEST_MODEL_INSTANCE.dict()
|
|
instance_extra["extra_prop"] = "EXTRA"
|
|
# back to string
|
|
instance_extra["json_field"] = json.dumps(["a", "b", "c"])
|
|
instance_extra["json_any_field"] = json.dumps("any_string")
|
|
|
|
# evolve - allow extra fields
|
|
model_evolve = apply_schema_contract_to_model(ModelWithConfig, "evolve")
|
|
# assert "frozen" in model_evolve.model_config
|
|
extra_instance = model_evolve.parse_obj(instance_extra)
|
|
assert hasattr(extra_instance, "extra_prop")
|
|
assert extra_instance.extra_prop == "EXTRA"
|
|
model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type]
|
|
extra_instance = model_evolve.parse_obj(instance_extra)
|
|
assert extra_instance.extra_prop == "EXTRA" # type: ignore[attr-defined]
|
|
|
|
# freeze - validation error on extra fields
|
|
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "freeze")
|
|
# assert "frozen" in model_freeze.model_config
|
|
with pytest.raises(ValidationError) as py_ex:
|
|
model_freeze.parse_obj(instance_extra)
|
|
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
|
|
model_freeze = apply_schema_contract_to_model(Model, "freeze") # type: ignore[arg-type]
|
|
with pytest.raises(ValidationError) as py_ex:
|
|
model_freeze.parse_obj(instance_extra)
|
|
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
|
|
|
|
# discard row - same as freeze
|
|
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "discard_row")
|
|
with pytest.raises(ValidationError) as py_ex:
|
|
model_freeze.parse_obj(instance_extra)
|
|
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
|
|
|
|
# discard value - ignore extra fields
|
|
model_discard = apply_schema_contract_to_model(ModelWithConfig, "discard_value")
|
|
extra_instance = model_discard.parse_obj(instance_extra)
|
|
assert not hasattr(extra_instance, "extra_prop")
|
|
model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type]
|
|
extra_instance = model_discard.parse_obj(instance_extra)
|
|
assert not hasattr(extra_instance, "extra_prop")
|
|
|
|
# evolve data but freeze new columns
|
|
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "freeze")
|
|
instance_extra_2 = copy(instance_extra)
|
|
# should parse ok
|
|
model_discard.parse_obj(instance_extra_2)
|
|
# this must fail validation
|
|
instance_extra_2["bigint_field"] = "NOT INT"
|
|
with pytest.raises(ValidationError):
|
|
model_discard.parse_obj(instance_extra_2)
|
|
# let the datatypes evolve
|
|
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "evolve")
|
|
print(model_freeze.parse_obj(instance_extra_2).dict())
|
|
|
|
with pytest.raises(NotImplementedError):
|
|
apply_schema_contract_to_model(ModelWithConfig, "evolve", "discard_value")
|
|
|
|
|
|
def test_nested_model_config_propagation() -> None:
|
|
# TODO: finish writing this test
|
|
model_freeze = apply_schema_contract_to_model(User, "evolve", "freeze")
|
|
from typing import get_type_hints
|
|
|
|
# print(model_freeze.__fields__)
|
|
# extra is modified
|
|
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressExtraAllow" # type: ignore[index]
|
|
# annotated is preserved
|
|
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
|
|
assert type_origin is Annotated
|
|
# UserAddress is converted to UserAddressAllow only once
|
|
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
|
|
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
|
|
|
|
# print(User.__fields__)
|
|
# print(User.__fields__["name"].annotation)
|
|
# print(model_freeze.model_config)
|
|
# print(model_freeze.__fields__)
|
|
# print(model_freeze.__fields__["name"].annotation)
|
|
# print(model_freeze.__fields__["address"].annotation)
|
|
|
|
|
|
@pytest.mark.skipif(sys.version_info < (3, 10), reason="Runs only on Python 3.10 and later")
|
|
def test_nested_model_config_propagation_optional_with_pipe():
|
|
"""We would like to test that using Optional and new | syntax works as expected
|
|
when generating a schema thus two versions of user model are defined and both instantiated
|
|
then we generate schema for both and check if results are the same.
|
|
"""
|
|
|
|
class UserLabelPipe(BaseModel):
|
|
label: str
|
|
|
|
class UserAddressPipe(BaseModel):
|
|
street: str
|
|
zip_code: Sequence[int]
|
|
label: UserLabelPipe | None # type: ignore[misc, syntax, unused-ignore]
|
|
ro_labels: Mapping[str, UserLabelPipe]
|
|
wr_labels: MutableMapping[str, List[UserLabelPipe]]
|
|
ro_list: Sequence[UserLabelPipe]
|
|
wr_list: MutableSequence[Dict[str, UserLabelPipe]]
|
|
|
|
class UserPipe(BaseModel):
|
|
user_id: int
|
|
name: Annotated[str, "PII", "name"]
|
|
created_at: datetime | None # type: ignore[misc, syntax, unused-ignore]
|
|
labels: List[str]
|
|
user_label: UserLabelPipe
|
|
user_labels: List[UserLabelPipe]
|
|
address: Annotated[UserAddressPipe, "PII", "address"]
|
|
unity: Union[UserAddressPipe, UserLabelPipe, Dict[str, UserAddressPipe]]
|
|
location: Annotated[Union[str, List[str]] | None, None] # type: ignore[misc, syntax, unused-ignore]
|
|
something_required: Annotated[Union[str, int], type(None)]
|
|
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc, syntax, unused-ignore]
|
|
final_optional: Final[Annotated[str | None, None]] # type: ignore[misc, syntax, unused-ignore]
|
|
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
|
|
|
|
# TODO: move to separate test
|
|
model_freeze = apply_schema_contract_to_model(UserPipe, "evolve", "freeze")
|
|
from typing import get_type_hints
|
|
|
|
# print(model_freeze.__fields__)
|
|
# extra is modified
|
|
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressPipeExtraAllow" # type: ignore[index]
|
|
# annotated is preserved
|
|
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
|
|
assert type_origin is Annotated
|
|
# UserAddress is converted to UserAddressAllow only once
|
|
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
|
|
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
|
|
|
|
# We need to check if pydantic_to_table_schema_columns is idempotent
|
|
# and can generate the same schema from the class and from the class instance.
|
|
|
|
user = UserPipe(**USER_INSTANCE_DATA) # type: ignore
|
|
schema_from_user_class = pydantic_to_table_schema_columns(UserPipe)
|
|
schema_from_user_instance = pydantic_to_table_schema_columns(user)
|
|
assert schema_from_user_class == schema_from_user_instance
|
|
assert schema_from_user_class["location"]["nullable"] is True
|
|
assert schema_from_user_class["final_location"]["nullable"] is False
|
|
assert schema_from_user_class["something_required"]["nullable"] is False
|
|
assert schema_from_user_class["final_optional"]["nullable"] is True
|
|
|
|
|
|
def test_item_list_validation() -> None:
|
|
class ItemModel(BaseModel):
|
|
b: bool
|
|
opt: Optional[int] = None
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
|
|
|
|
# non validating items removed from the list (both extra and declared)
|
|
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
|
|
discard_list_model = create_list_model(discard_model)
|
|
# violate data type
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
discard_list_model,
|
|
[{"b": True}, {"b": 2, "opt": "not int", "extra": 1.2}, {"b": 3}, {"b": False}],
|
|
"discard_row",
|
|
"discard_row",
|
|
)
|
|
# {"b": 2, "opt": "not int", "extra": 1.2} - note that this will generate 3 errors for the same item
|
|
# and is crucial in our tests when discarding rows
|
|
assert len(items) == 2
|
|
assert items[0].b is True
|
|
assert items[1].b is False
|
|
# violate extra field
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
discard_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
|
|
"discard_row",
|
|
"discard_row",
|
|
)
|
|
assert len(items) == 1
|
|
assert items[0].b is True
|
|
|
|
# freeze on non validating items (both extra and declared)
|
|
freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze")
|
|
freeze_list_model = create_list_model(freeze_model)
|
|
# violate data type
|
|
with pytest.raises(DataValidationError) as val_ex:
|
|
validate_and_filter_items(
|
|
"items",
|
|
freeze_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
|
|
"freeze",
|
|
"freeze",
|
|
)
|
|
assert val_ex.value.schema_name is None
|
|
assert val_ex.value.table_name == "items"
|
|
assert val_ex.value.column_name == str(("items", 1, "b")) # pydantic location
|
|
assert val_ex.value.schema_entity == "data_type"
|
|
assert val_ex.value.contract_mode == "freeze"
|
|
assert val_ex.value.table_schema is freeze_list_model
|
|
assert val_ex.value.data_item == {"b": 2}
|
|
# extra type
|
|
with pytest.raises(DataValidationError) as val_ex:
|
|
validate_and_filter_items(
|
|
"items",
|
|
freeze_list_model,
|
|
[{"b": True}, {"a": 2, "b": False}, {"b": 3}, {"b": False}],
|
|
"freeze",
|
|
"freeze",
|
|
)
|
|
assert val_ex.value.schema_name is None
|
|
assert val_ex.value.table_name == "items"
|
|
assert val_ex.value.column_name == str(("items", 1, "a")) # pydantic location
|
|
assert val_ex.value.schema_entity == "columns"
|
|
assert val_ex.value.contract_mode == "freeze"
|
|
assert val_ex.value.table_schema is freeze_list_model
|
|
assert val_ex.value.data_item == {"a": 2, "b": False}
|
|
|
|
# discard values
|
|
discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze")
|
|
discard_list_model = create_list_model(discard_value_model)
|
|
# violate extra field
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
discard_list_model,
|
|
[{"b": True}, {"b": False, "a": False}],
|
|
"discard_value",
|
|
"freeze",
|
|
)
|
|
assert len(items) == 2
|
|
# "a" extra got remove
|
|
assert items[1].dict() == {"b": False, "opt": None}
|
|
# violate data type
|
|
with pytest.raises(NotImplementedError):
|
|
apply_schema_contract_to_model(ItemModel, "discard_value", "discard_value")
|
|
|
|
# evolve data types and extras
|
|
evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve")
|
|
evolve_list_model = create_list_model(evolve_model)
|
|
# for data types a lenient model will be created that accepts any type
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
evolve_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
|
|
"evolve",
|
|
"evolve",
|
|
)
|
|
assert len(items) == 4
|
|
assert items[0].b is True
|
|
assert items[1].b == 2
|
|
# extra fields allowed
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
evolve_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
|
|
"evolve",
|
|
"evolve",
|
|
)
|
|
assert len(items) == 4
|
|
assert items[3].b is False
|
|
assert items[3].a is False # type: ignore[attr-defined]
|
|
|
|
# accept new types but discard new columns
|
|
mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve")
|
|
mixed_list_model = create_list_model(mixed_model)
|
|
# for data types a lenient model will be created that accepts any type
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
mixed_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
|
|
"discard_row",
|
|
"evolve",
|
|
)
|
|
assert len(items) == 4
|
|
assert items[0].b is True
|
|
assert items[1].b == 2
|
|
# extra fields forbidden - full rows discarded
|
|
items = validate_and_filter_items(
|
|
"items",
|
|
mixed_list_model,
|
|
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
|
|
"discard_row",
|
|
"evolve",
|
|
)
|
|
assert len(items) == 3
|
|
|
|
|
|
def test_item_validation() -> None:
|
|
class ItemModel(BaseModel):
|
|
b: bool
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
|
|
|
|
# non validating items removed from the list (both extra and declared)
|
|
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
|
|
# violate data type
|
|
assert (
|
|
validate_and_filter_item("items", discard_model, {"b": 2}, "discard_row", "discard_row")
|
|
is None
|
|
)
|
|
# violate extra field
|
|
assert (
|
|
validate_and_filter_item(
|
|
"items", discard_model, {"b": False, "a": False}, "discard_row", "discard_row"
|
|
)
|
|
is None
|
|
)
|
|
|
|
# freeze on non validating items (both extra and declared)
|
|
freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze")
|
|
# violate data type
|
|
with pytest.raises(DataValidationError) as val_ex:
|
|
validate_and_filter_item("items", freeze_model, {"b": 2}, "freeze", "freeze")
|
|
assert val_ex.value.schema_name is None
|
|
assert val_ex.value.table_name == "items"
|
|
assert val_ex.value.column_name == str(("b",)) # pydantic location
|
|
assert val_ex.value.schema_entity == "data_type"
|
|
assert val_ex.value.contract_mode == "freeze"
|
|
assert val_ex.value.table_schema is freeze_model
|
|
assert val_ex.value.data_item == {"b": 2}
|
|
# extra type
|
|
with pytest.raises(DataValidationError) as val_ex:
|
|
validate_and_filter_item("items", freeze_model, {"a": 2, "b": False}, "freeze", "freeze")
|
|
assert val_ex.value.schema_name is None
|
|
assert val_ex.value.table_name == "items"
|
|
assert val_ex.value.column_name == str(("a",)) # pydantic location
|
|
assert val_ex.value.schema_entity == "columns"
|
|
assert val_ex.value.contract_mode == "freeze"
|
|
assert val_ex.value.table_schema is freeze_model
|
|
assert val_ex.value.data_item == {"a": 2, "b": False}
|
|
|
|
# discard values
|
|
discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze")
|
|
# violate extra field
|
|
item = validate_and_filter_item(
|
|
"items", discard_value_model, {"b": False, "a": False}, "discard_value", "freeze"
|
|
)
|
|
# "a" extra got removed
|
|
assert item.dict() == {"b": False}
|
|
|
|
# evolve data types and extras
|
|
evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve")
|
|
# for data types a lenient model will be created that accepts any type
|
|
item = validate_and_filter_item("items", evolve_model, {"b": 2}, "evolve", "evolve")
|
|
assert item.b == 2
|
|
# extra fields allowed
|
|
item = validate_and_filter_item(
|
|
"items", evolve_model, {"b": False, "a": False}, "evolve", "evolve"
|
|
)
|
|
assert item.b is False
|
|
assert item.a is False # type: ignore[attr-defined]
|
|
|
|
# accept new types but discard new columns
|
|
mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve")
|
|
# for data types a lenient model will be created that accepts any type
|
|
item = validate_and_filter_item("items", mixed_model, {"b": 3}, "discard_row", "evolve")
|
|
assert item.b == 3
|
|
# extra fields forbidden - full rows discarded
|
|
assert (
|
|
validate_and_filter_item(
|
|
"items", mixed_model, {"b": False, "a": False}, "discard_row", "evolve"
|
|
)
|
|
is None
|
|
)
|
|
|
|
|
|
class ChildModel(BaseModel):
|
|
child_attribute: str
|
|
optional_child_attribute: Optional[str] = None
|
|
|
|
|
|
class Parent(BaseModel):
|
|
child: ChildModel
|
|
optional_parent_attribute: Optional[str] = None
|
|
|
|
|
|
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
|
|
def test_pydantic_model_flattened_when_skip_nested_types_is_true(config_attr: str):
|
|
class MyParent(Parent):
|
|
dlt_config: ClassVar[DltConfig] = {config_attr: True} # type: ignore
|
|
|
|
schema = pydantic_to_table_schema_columns(MyParent)
|
|
|
|
assert schema == {
|
|
"child__child_attribute": {
|
|
"data_type": "text",
|
|
"name": "child__child_attribute",
|
|
"nullable": False,
|
|
},
|
|
"child__optional_child_attribute": {
|
|
"data_type": "text",
|
|
"name": "child__optional_child_attribute",
|
|
"nullable": True,
|
|
},
|
|
"optional_parent_attribute": {
|
|
"data_type": "text",
|
|
"name": "optional_parent_attribute",
|
|
"nullable": True,
|
|
},
|
|
}
|
|
|
|
|
|
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
|
|
def test_considers_model_as_complex_when_skip_nested_types_is_false(config_attr: str):
|
|
class MyParent(Parent):
|
|
data_dictionary: Dict[str, Any] = None
|
|
dlt_config: ClassVar[DltConfig] = {config_attr: False} # type: ignore
|
|
|
|
schema = pydantic_to_table_schema_columns(MyParent)
|
|
|
|
assert schema == {
|
|
"child": {"data_type": "json", "name": "child", "nullable": False},
|
|
"data_dictionary": {"data_type": "json", "name": "data_dictionary", "nullable": False},
|
|
"optional_parent_attribute": {
|
|
"data_type": "text",
|
|
"name": "optional_parent_attribute",
|
|
"nullable": True,
|
|
},
|
|
}
|
|
|
|
|
|
def test_considers_dictionary_as_complex_when_skip_nested_types_is_false():
|
|
class MyParent(Parent):
|
|
data_list: List[str] = []
|
|
data_dictionary: Dict[str, Any] = None
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
|
|
|
|
schema = pydantic_to_table_schema_columns(MyParent)
|
|
|
|
assert schema["data_dictionary"] == {
|
|
"data_type": "json",
|
|
"name": "data_dictionary",
|
|
"nullable": False,
|
|
}
|
|
|
|
assert schema["data_list"] == {
|
|
"data_type": "json",
|
|
"name": "data_list",
|
|
"nullable": False,
|
|
}
|
|
|
|
|
|
def test_skip_json_types_when_skip_nested_types_is_true_and_field_is_not_pydantic_model():
|
|
class MyParent(Parent):
|
|
data_list: List[str] = []
|
|
data_dictionary: Dict[str, Any] = None
|
|
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
|
|
|
|
schema = pydantic_to_table_schema_columns(MyParent)
|
|
|
|
assert "data_dictionary" not in schema
|
|
assert "data_list" not in schema
|
|
|
|
|
|
def test_typed_dict_by_python_version():
|
|
"""when using typeddict in pydantic, it should be imported
|
|
from typing_extensions in python 3.11 and earlier and typing
|
|
in python 3.12 and later.
|
|
Here we test that this is properly set up in dlt.
|
|
"""
|
|
|
|
class MyModel(BaseModel):
|
|
# TColumnType inherits from TypedDict
|
|
column_type: TColumnType
|
|
|
|
m = MyModel(column_type={"data_type": "text"})
|
|
assert m.column_type == {"data_type": "text"}
|
|
|
|
with pytest.raises(ValidationError):
|
|
m = MyModel(column_type={"data_type": "invalid_type"}) # type: ignore[typeddict-item]
|