Files
dlt/tests/libs/test_pydantic.py
David Scharf cbcff925ba drop python 3.8, enable python 3.13, and enable full linting for 3.12 (#2194)
* add python 3.12 linting

* update locked versions to make project installable on py 3.12

* update flake8

* downgrade poetry for all tests relying on python3.8

* drop python 3.8

* enable python3.13

* copy test updates from python3.13 branch

* update locked sentry version

* pin poetry to 1.8.5

* install ibis outside of poetry

* rename to workflows for consistency

* switch to published alpha version of dlt-pendulum for python 3.13

* fix images

* add note to readme
2025-01-12 16:40:41 +01:00

746 lines
27 KiB
Python

import sys
from copy import copy
from dataclasses import dataclass, field
import uuid
import pytest
from typing import (
ClassVar,
Final,
Generic,
Sequence,
Mapping,
Dict,
MutableMapping,
MutableSequence,
TypeVar,
Union,
Optional,
List,
Any,
)
from typing_extensions import Annotated, get_args, get_origin
from enum import Enum
from datetime import datetime, date, time # noqa: I251
from dlt.common import Decimal
from dlt.common import json
from dlt.common.schema.typing import TColumnType
from dlt.common.libs.pydantic import (
DltConfig,
pydantic_to_table_schema_columns,
apply_schema_contract_to_model,
validate_and_filter_item,
validate_and_filter_items,
create_list_model,
)
from pydantic import UUID4, BaseModel, Json, AnyHttpUrl, ConfigDict, ValidationError
from dlt.common.schema.exceptions import DataValidationError
class StrEnum(str, Enum):
a = "a_value"
b = "b_value"
c = "c_value"
class IntEnum(int, Enum):
a = 0
b = 1
c = 2
class MixedEnum(Enum):
a_int = 0
b_str = "b_value"
c_int = 2
class NestedModel(BaseModel):
nested_field: str
class Model(BaseModel):
bigint_field: int
text_field: str
timestamp_field: datetime
date_field: date
decimal_field: Decimal
double_field: float
time_field: time
nested_field: NestedModel
list_field: List[str]
union_field: Union[int, str]
optional_field: Optional[float]
blank_dict_field: dict # type: ignore[type-arg]
parametrized_dict_field: Dict[str, int]
str_enum_field: StrEnum
int_enum_field: IntEnum
# Both of these shouold coerce to str
mixed_enum_int_field: MixedEnum
mixed_enum_str_field: MixedEnum
json_field: Json[List[str]]
url_field: AnyHttpUrl
any_field: Any
json_any_field: Json[Any]
class ModelWithConfig(Model):
model_config = ConfigDict(frozen=True, extra="allow")
TEST_MODEL_INSTANCE = Model(
bigint_field=1,
text_field="text",
timestamp_field=datetime.now(),
date_field=date.today(),
decimal_field=Decimal(1.1),
double_field=1.1,
time_field=time(1, 2, 3, 12345),
nested_field=NestedModel(nested_field="nested"),
list_field=["a", "b", "c"],
union_field=1,
optional_field=None,
blank_dict_field={},
parametrized_dict_field={"a": 1, "b": 2, "c": 3},
str_enum_field=StrEnum.a,
int_enum_field=IntEnum.a,
mixed_enum_int_field=MixedEnum.a_int,
mixed_enum_str_field=MixedEnum.b_str,
json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type]
url_field="https://example.com", # type: ignore[arg-type]
any_field="any_string",
json_any_field=json.dumps("any_string"),
)
class BookGenre(str, Enum):
scifi = "scifi"
action = "action"
thriller = "thriller"
@dataclass
class BookInfo:
isbn: Optional[str] = field(default="ISBN")
author: Optional[str] = field(default="Charles Bukowski")
class UserLabel(BaseModel):
label: str
class UserAddress(BaseModel):
street: str
zip_code: Sequence[int]
label: Optional[UserLabel]
ro_labels: Mapping[str, UserLabel]
wr_labels: MutableMapping[str, List[UserLabel]]
ro_list: Sequence[UserLabel]
wr_list: MutableSequence[Dict[str, UserLabel]]
class User(BaseModel):
user_id: int
account_id: UUID4
optional_uuid: Optional[UUID4]
name: Annotated[str, "PII", "name"]
favorite_book: Annotated[Union[Annotated[BookInfo, "meta"], BookGenre, None], "union metadata"]
created_at: Optional[datetime]
labels: List[str]
user_label: UserLabel
user_labels: List[UserLabel]
address: Annotated[UserAddress, "PII", "address"]
uuid_or_str: Union[str, UUID4, None]
unity: Union[UserAddress, UserLabel, Dict[str, UserAddress]]
# NOTE: added "int" because this type was clashing with a type
# in a delta-rs library that got cached and that re-orders the union
location: Annotated[Optional[Union[str, List[str], int]], None]
something_required: Annotated[Union[str, int], type(None)]
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc]
final_optional: Final[Annotated[Optional[str], None]] # type: ignore[misc]
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
USER_INSTANCE_DATA = dict(
user_id=1,
account_id=uuid.uuid4(),
optional_uuid=None,
favorite_book=BookInfo(isbn="isbn-xyz", author="author"),
name="random name",
created_at=datetime.now(),
labels=["str"],
user_label=dict(label="123"),
user_labels=[
dict(label="123"),
],
address=dict(
street="random street",
zip_code=[1234566, 4567789],
label=dict(label="123"),
ro_labels={
"x": dict(label="123"),
},
wr_labels={
"y": [
dict(label="123"),
]
},
ro_list=[
dict(label="123"),
],
wr_list=[
{
"x": dict(label="123"),
}
],
),
unity=dict(label="123"),
uuid_or_str=uuid.uuid4(),
location="Florida keys",
final_location="Ginnie Springs",
something_required=123,
final_optional=None,
)
@pytest.mark.parametrize("instance", [True, False])
def test_pydantic_model_to_columns(instance: bool) -> None:
if instance:
model = TEST_MODEL_INSTANCE
else:
model = Model # type: ignore[assignment]
result = pydantic_to_table_schema_columns(model)
assert result["bigint_field"]["data_type"] == "bigint"
assert result["text_field"]["data_type"] == "text"
assert result["timestamp_field"]["data_type"] == "timestamp"
assert result["date_field"]["data_type"] == "date"
assert result["decimal_field"]["data_type"] == "decimal"
assert result["double_field"]["data_type"] == "double"
assert result["time_field"]["data_type"] == "time"
assert result["nested_field"]["data_type"] == "json"
assert result["list_field"]["data_type"] == "json"
assert result["union_field"]["data_type"] == "bigint"
assert result["optional_field"]["data_type"] == "double"
assert result["optional_field"]["nullable"] is True
assert result["blank_dict_field"]["data_type"] == "json"
assert result["parametrized_dict_field"]["data_type"] == "json"
assert result["str_enum_field"]["data_type"] == "text"
assert result["int_enum_field"]["data_type"] == "bigint"
assert result["mixed_enum_int_field"]["data_type"] == "text"
assert result["mixed_enum_str_field"]["data_type"] == "text"
assert result["json_field"]["data_type"] == "json"
assert result["url_field"]["data_type"] == "text"
# Any type fields are excluded from schema
assert "any_field" not in result
assert "json_any_field" not in result
def test_pydantic_model_to_columns_annotated() -> None:
# We need to check if pydantic_to_table_schema_columns is idempotent
# and can generate the same schema from the class and from the class instance.
schema_from_user_class = pydantic_to_table_schema_columns(User)
schema_from_user_instance = pydantic_to_table_schema_columns(User(**USER_INSTANCE_DATA)) # type: ignore
assert schema_from_user_class == schema_from_user_instance
assert schema_from_user_class["location"]["nullable"] is True
assert schema_from_user_class["final_location"]["nullable"] is False
assert schema_from_user_class["something_required"]["nullable"] is False
assert schema_from_user_class["final_optional"]["nullable"] is True
def test_pydantic_model_skip_nested_types() -> None:
class SkipNestedModel(Model):
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
result = pydantic_to_table_schema_columns(SkipNestedModel)
assert result["bigint_field"]["data_type"] == "bigint"
assert "nested_field" not in result
assert "list_field" not in result
assert "blank_dict_field" not in result
assert "parametrized_dict_field" not in result
assert "json_field" not in result
assert result["bigint_field"]["data_type"] == "bigint"
assert result["text_field"]["data_type"] == "text"
assert result["timestamp_field"]["data_type"] == "timestamp"
def test_model_for_column_mode() -> None:
# extra prop
instance_extra = TEST_MODEL_INSTANCE.dict()
instance_extra["extra_prop"] = "EXTRA"
# back to string
instance_extra["json_field"] = json.dumps(["a", "b", "c"])
instance_extra["json_any_field"] = json.dumps("any_string")
# evolve - allow extra fields
model_evolve = apply_schema_contract_to_model(ModelWithConfig, "evolve")
# assert "frozen" in model_evolve.model_config
extra_instance = model_evolve.parse_obj(instance_extra)
assert hasattr(extra_instance, "extra_prop")
assert extra_instance.extra_prop == "EXTRA"
model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type]
extra_instance = model_evolve.parse_obj(instance_extra)
assert extra_instance.extra_prop == "EXTRA" # type: ignore[attr-defined]
# freeze - validation error on extra fields
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "freeze")
# assert "frozen" in model_freeze.model_config
with pytest.raises(ValidationError) as py_ex:
model_freeze.parse_obj(instance_extra)
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
model_freeze = apply_schema_contract_to_model(Model, "freeze") # type: ignore[arg-type]
with pytest.raises(ValidationError) as py_ex:
model_freeze.parse_obj(instance_extra)
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
# discard row - same as freeze
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "discard_row")
with pytest.raises(ValidationError) as py_ex:
model_freeze.parse_obj(instance_extra)
assert py_ex.value.errors()[0]["loc"] == ("extra_prop",)
# discard value - ignore extra fields
model_discard = apply_schema_contract_to_model(ModelWithConfig, "discard_value")
extra_instance = model_discard.parse_obj(instance_extra)
assert not hasattr(extra_instance, "extra_prop")
model_evolve = apply_schema_contract_to_model(Model, "evolve") # type: ignore[arg-type]
extra_instance = model_discard.parse_obj(instance_extra)
assert not hasattr(extra_instance, "extra_prop")
# evolve data but freeze new columns
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "freeze")
instance_extra_2 = copy(instance_extra)
# should parse ok
model_discard.parse_obj(instance_extra_2)
# this must fail validation
instance_extra_2["bigint_field"] = "NOT INT"
with pytest.raises(ValidationError):
model_discard.parse_obj(instance_extra_2)
# let the datatypes evolve
model_freeze = apply_schema_contract_to_model(ModelWithConfig, "evolve", "evolve")
print(model_freeze.parse_obj(instance_extra_2).dict())
with pytest.raises(NotImplementedError):
apply_schema_contract_to_model(ModelWithConfig, "evolve", "discard_value")
def test_nested_model_config_propagation() -> None:
# TODO: finish writing this test
model_freeze = apply_schema_contract_to_model(User, "evolve", "freeze")
from typing import get_type_hints
# print(model_freeze.__fields__)
# extra is modified
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressExtraAllow" # type: ignore[index]
# annotated is preserved
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
assert type_origin is Annotated
# UserAddress is converted to UserAddressAllow only once
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
# print(User.__fields__)
# print(User.__fields__["name"].annotation)
# print(model_freeze.model_config)
# print(model_freeze.__fields__)
# print(model_freeze.__fields__["name"].annotation)
# print(model_freeze.__fields__["address"].annotation)
@pytest.mark.skipif(sys.version_info < (3, 10), reason="Runs only on Python 3.10 and later")
def test_nested_model_config_propagation_optional_with_pipe():
"""We would like to test that using Optional and new | syntax works as expected
when generating a schema thus two versions of user model are defined and both instantiated
then we generate schema for both and check if results are the same.
"""
class UserLabelPipe(BaseModel):
label: str
class UserAddressPipe(BaseModel):
street: str
zip_code: Sequence[int]
label: UserLabelPipe | None # type: ignore[misc, syntax, unused-ignore]
ro_labels: Mapping[str, UserLabelPipe]
wr_labels: MutableMapping[str, List[UserLabelPipe]]
ro_list: Sequence[UserLabelPipe]
wr_list: MutableSequence[Dict[str, UserLabelPipe]]
class UserPipe(BaseModel):
user_id: int
name: Annotated[str, "PII", "name"]
created_at: datetime | None # type: ignore[misc, syntax, unused-ignore]
labels: List[str]
user_label: UserLabelPipe
user_labels: List[UserLabelPipe]
address: Annotated[UserAddressPipe, "PII", "address"]
unity: Union[UserAddressPipe, UserLabelPipe, Dict[str, UserAddressPipe]]
location: Annotated[Union[str, List[str]] | None, None] # type: ignore[misc, syntax, unused-ignore]
something_required: Annotated[Union[str, int], type(None)]
final_location: Final[Annotated[Union[str, int], None]] # type: ignore[misc, syntax, unused-ignore]
final_optional: Final[Annotated[str | None, None]] # type: ignore[misc, syntax, unused-ignore]
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
# TODO: move to separate test
model_freeze = apply_schema_contract_to_model(UserPipe, "evolve", "freeze")
from typing import get_type_hints
# print(model_freeze.__fields__)
# extra is modified
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressPipeExtraAllow" # type: ignore[index]
# annotated is preserved
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
assert type_origin is Annotated
# UserAddress is converted to UserAddressAllow only once
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
# We need to check if pydantic_to_table_schema_columns is idempotent
# and can generate the same schema from the class and from the class instance.
user = UserPipe(**USER_INSTANCE_DATA) # type: ignore
schema_from_user_class = pydantic_to_table_schema_columns(UserPipe)
schema_from_user_instance = pydantic_to_table_schema_columns(user)
assert schema_from_user_class == schema_from_user_instance
assert schema_from_user_class["location"]["nullable"] is True
assert schema_from_user_class["final_location"]["nullable"] is False
assert schema_from_user_class["something_required"]["nullable"] is False
assert schema_from_user_class["final_optional"]["nullable"] is True
def test_item_list_validation() -> None:
class ItemModel(BaseModel):
b: bool
opt: Optional[int] = None
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
# non validating items removed from the list (both extra and declared)
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
discard_list_model = create_list_model(discard_model)
# violate data type
items = validate_and_filter_items(
"items",
discard_list_model,
[{"b": True}, {"b": 2, "opt": "not int", "extra": 1.2}, {"b": 3}, {"b": False}],
"discard_row",
"discard_row",
)
# {"b": 2, "opt": "not int", "extra": 1.2} - note that this will generate 3 errors for the same item
# and is crucial in our tests when discarding rows
assert len(items) == 2
assert items[0].b is True
assert items[1].b is False
# violate extra field
items = validate_and_filter_items(
"items",
discard_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
"discard_row",
"discard_row",
)
assert len(items) == 1
assert items[0].b is True
# freeze on non validating items (both extra and declared)
freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze")
freeze_list_model = create_list_model(freeze_model)
# violate data type
with pytest.raises(DataValidationError) as val_ex:
validate_and_filter_items(
"items",
freeze_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
"freeze",
"freeze",
)
assert val_ex.value.schema_name is None
assert val_ex.value.table_name == "items"
assert val_ex.value.column_name == str(("items", 1, "b")) # pydantic location
assert val_ex.value.schema_entity == "data_type"
assert val_ex.value.contract_mode == "freeze"
assert val_ex.value.table_schema is freeze_list_model
assert val_ex.value.data_item == {"b": 2}
# extra type
with pytest.raises(DataValidationError) as val_ex:
validate_and_filter_items(
"items",
freeze_list_model,
[{"b": True}, {"a": 2, "b": False}, {"b": 3}, {"b": False}],
"freeze",
"freeze",
)
assert val_ex.value.schema_name is None
assert val_ex.value.table_name == "items"
assert val_ex.value.column_name == str(("items", 1, "a")) # pydantic location
assert val_ex.value.schema_entity == "columns"
assert val_ex.value.contract_mode == "freeze"
assert val_ex.value.table_schema is freeze_list_model
assert val_ex.value.data_item == {"a": 2, "b": False}
# discard values
discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze")
discard_list_model = create_list_model(discard_value_model)
# violate extra field
items = validate_and_filter_items(
"items",
discard_list_model,
[{"b": True}, {"b": False, "a": False}],
"discard_value",
"freeze",
)
assert len(items) == 2
# "a" extra got remove
assert items[1].dict() == {"b": False, "opt": None}
# violate data type
with pytest.raises(NotImplementedError):
apply_schema_contract_to_model(ItemModel, "discard_value", "discard_value")
# evolve data types and extras
evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve")
evolve_list_model = create_list_model(evolve_model)
# for data types a lenient model will be created that accepts any type
items = validate_and_filter_items(
"items",
evolve_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
"evolve",
"evolve",
)
assert len(items) == 4
assert items[0].b is True
assert items[1].b == 2
# extra fields allowed
items = validate_and_filter_items(
"items",
evolve_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
"evolve",
"evolve",
)
assert len(items) == 4
assert items[3].b is False
assert items[3].a is False # type: ignore[attr-defined]
# accept new types but discard new columns
mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve")
mixed_list_model = create_list_model(mixed_model)
# for data types a lenient model will be created that accepts any type
items = validate_and_filter_items(
"items",
mixed_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False}],
"discard_row",
"evolve",
)
assert len(items) == 4
assert items[0].b is True
assert items[1].b == 2
# extra fields forbidden - full rows discarded
items = validate_and_filter_items(
"items",
mixed_list_model,
[{"b": True}, {"b": 2}, {"b": 3}, {"b": False, "a": False}],
"discard_row",
"evolve",
)
assert len(items) == 3
def test_item_validation() -> None:
class ItemModel(BaseModel):
b: bool
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
# non validating items removed from the list (both extra and declared)
discard_model = apply_schema_contract_to_model(ItemModel, "discard_row", "discard_row")
# violate data type
assert (
validate_and_filter_item("items", discard_model, {"b": 2}, "discard_row", "discard_row")
is None
)
# violate extra field
assert (
validate_and_filter_item(
"items", discard_model, {"b": False, "a": False}, "discard_row", "discard_row"
)
is None
)
# freeze on non validating items (both extra and declared)
freeze_model = apply_schema_contract_to_model(ItemModel, "freeze", "freeze")
# violate data type
with pytest.raises(DataValidationError) as val_ex:
validate_and_filter_item("items", freeze_model, {"b": 2}, "freeze", "freeze")
assert val_ex.value.schema_name is None
assert val_ex.value.table_name == "items"
assert val_ex.value.column_name == str(("b",)) # pydantic location
assert val_ex.value.schema_entity == "data_type"
assert val_ex.value.contract_mode == "freeze"
assert val_ex.value.table_schema is freeze_model
assert val_ex.value.data_item == {"b": 2}
# extra type
with pytest.raises(DataValidationError) as val_ex:
validate_and_filter_item("items", freeze_model, {"a": 2, "b": False}, "freeze", "freeze")
assert val_ex.value.schema_name is None
assert val_ex.value.table_name == "items"
assert val_ex.value.column_name == str(("a",)) # pydantic location
assert val_ex.value.schema_entity == "columns"
assert val_ex.value.contract_mode == "freeze"
assert val_ex.value.table_schema is freeze_model
assert val_ex.value.data_item == {"a": 2, "b": False}
# discard values
discard_value_model = apply_schema_contract_to_model(ItemModel, "discard_value", "freeze")
# violate extra field
item = validate_and_filter_item(
"items", discard_value_model, {"b": False, "a": False}, "discard_value", "freeze"
)
# "a" extra got removed
assert item.dict() == {"b": False}
# evolve data types and extras
evolve_model = apply_schema_contract_to_model(ItemModel, "evolve", "evolve")
# for data types a lenient model will be created that accepts any type
item = validate_and_filter_item("items", evolve_model, {"b": 2}, "evolve", "evolve")
assert item.b == 2
# extra fields allowed
item = validate_and_filter_item(
"items", evolve_model, {"b": False, "a": False}, "evolve", "evolve"
)
assert item.b is False
assert item.a is False # type: ignore[attr-defined]
# accept new types but discard new columns
mixed_model = apply_schema_contract_to_model(ItemModel, "discard_row", "evolve")
# for data types a lenient model will be created that accepts any type
item = validate_and_filter_item("items", mixed_model, {"b": 3}, "discard_row", "evolve")
assert item.b == 3
# extra fields forbidden - full rows discarded
assert (
validate_and_filter_item(
"items", mixed_model, {"b": False, "a": False}, "discard_row", "evolve"
)
is None
)
class ChildModel(BaseModel):
child_attribute: str
optional_child_attribute: Optional[str] = None
class Parent(BaseModel):
child: ChildModel
optional_parent_attribute: Optional[str] = None
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
def test_pydantic_model_flattened_when_skip_nested_types_is_true(config_attr: str):
class MyParent(Parent):
dlt_config: ClassVar[DltConfig] = {config_attr: True} # type: ignore
schema = pydantic_to_table_schema_columns(MyParent)
assert schema == {
"child__child_attribute": {
"data_type": "text",
"name": "child__child_attribute",
"nullable": False,
},
"child__optional_child_attribute": {
"data_type": "text",
"name": "child__optional_child_attribute",
"nullable": True,
},
"optional_parent_attribute": {
"data_type": "text",
"name": "optional_parent_attribute",
"nullable": True,
},
}
@pytest.mark.parametrize("config_attr", ("skip_nested_types", "skip_complex_types"))
def test_considers_model_as_complex_when_skip_nested_types_is_false(config_attr: str):
class MyParent(Parent):
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {config_attr: False} # type: ignore
schema = pydantic_to_table_schema_columns(MyParent)
assert schema == {
"child": {"data_type": "json", "name": "child", "nullable": False},
"data_dictionary": {"data_type": "json", "name": "data_dictionary", "nullable": False},
"optional_parent_attribute": {
"data_type": "text",
"name": "optional_parent_attribute",
"nullable": True,
},
}
def test_considers_dictionary_as_complex_when_skip_nested_types_is_false():
class MyParent(Parent):
data_list: List[str] = []
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": False}
schema = pydantic_to_table_schema_columns(MyParent)
assert schema["data_dictionary"] == {
"data_type": "json",
"name": "data_dictionary",
"nullable": False,
}
assert schema["data_list"] == {
"data_type": "json",
"name": "data_list",
"nullable": False,
}
def test_skip_json_types_when_skip_nested_types_is_true_and_field_is_not_pydantic_model():
class MyParent(Parent):
data_list: List[str] = []
data_dictionary: Dict[str, Any] = None
dlt_config: ClassVar[DltConfig] = {"skip_nested_types": True}
schema = pydantic_to_table_schema_columns(MyParent)
assert "data_dictionary" not in schema
assert "data_list" not in schema
def test_typed_dict_by_python_version():
"""when using typeddict in pydantic, it should be imported
from typing_extensions in python 3.11 and earlier and typing
in python 3.12 and later.
Here we test that this is properly set up in dlt.
"""
class MyModel(BaseModel):
# TColumnType inherits from TypedDict
column_type: TColumnType
m = MyModel(column_type={"data_type": "text"})
assert m.column_type == {"data_type": "text"}
with pytest.raises(ValidationError):
m = MyModel(column_type={"data_type": "invalid_type"}) # type: ignore[typeddict-item]