mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
feat/3103: Ensure consistency in HexBytes coercion (#3200)
* Refactor: Replace hexbytes dependency with custom HexBytes implementation * Removed the hexbytes library and integrated a custom HexBytes class to ensure compatibility with the codebase. * Updated imports across multiple files to use the new HexBytes class. * Added tests for the HexBytes class to validate its functionality and ensure proper behavior with various input types. * Update hexbytes error handling test to reject lists as input type * Remove TypeError test for unsupported list input in HexBytes error handling * Refactor: Improve formatting of hex method in HexBytes class for better readability * Refactor: Clean up comments and improve readability in hex method of HexBytes class * Refactor: Rename methods in HexBytes class for clarity and consistency * Updated method names from `to_bytes` to `_to_bytes` and `hexstr_to_bytes` to `_hexstr_to_bytes` to indicate their private nature. * Adjusted method calls within the class to reflect the new names, enhancing code readability and maintainability. * * Removed support for bool and int types in HexBytes constructor, streamlining input handling and Introduced a new fromhex method to create HexBytes from hex strings, improving clarity. * Remove hexbytes dependency from lockfile and related configurations * Enhance hex method in HexBytes class to support custom separators and bytes per separator. This improves flexibility in hex encoding output while maintaining the existing functionality. * Refactor hex method in HexBytes class to improve parameter handling and readability. Updated the method signature to clarify the use of custom separators and bytes per separator, ensuring consistent behavior with existing functionality. * Update hex method in HexBytes class to remove unnecessary noqa comments, enhancing code clarity and consistency.
This commit is contained in:
@@ -4,7 +4,6 @@ import dataclasses
|
||||
from datetime import date, datetime, time # noqa: I251
|
||||
from typing import Any, Callable, List, Protocol, IO, Union, Dict
|
||||
from uuid import UUID
|
||||
from hexbytes import HexBytes
|
||||
from enum import Enum
|
||||
|
||||
try:
|
||||
@@ -18,7 +17,7 @@ from dlt.common.pendulum import pendulum
|
||||
from dlt.common.arithmetics import Decimal
|
||||
from dlt.common.wei import Wei
|
||||
from dlt.common.utils import map_nested_values_in_place
|
||||
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
|
||||
TPuaDecoders = List[Callable[[Any], Any]]
|
||||
|
||||
|
||||
88
dlt/common/libs/hexbytes.py
Normal file
88
dlt/common/libs/hexbytes.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Union,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import (
|
||||
SupportsIndex,
|
||||
)
|
||||
|
||||
BytesLike = Union[bytearray, bytes, str, memoryview]
|
||||
|
||||
HEX_PREFIX_LOWER = "0x"
|
||||
HEX_PREFIX_UPPER = "0X"
|
||||
|
||||
|
||||
class HexBytes(bytes):
|
||||
"""
|
||||
HexBytes is a custom library that replaces the hexbytes library to ensure compatibility with the rest of the codebase.
|
||||
It has these changes:
|
||||
1. It always appends 0x prefix to the hex string.
|
||||
2. The representation at console (__repr__) is 0x-prefixed
|
||||
"""
|
||||
|
||||
def __new__(cls, val: BytesLike) -> "HexBytes":
|
||||
bytesval = HexBytes._to_bytes(val)
|
||||
return cast(HexBytes, super().__new__(cls, bytesval)) # type: ignore # https://github.com/python/typeshed/issues/2630 # noqa: E501
|
||||
|
||||
def hex( # noqa: A003
|
||||
self, sep: Union[str, bytes] = None, bytes_per_sep: "SupportsIndex" = 1
|
||||
) -> str:
|
||||
"""
|
||||
Output hex-encoded bytes, with an "0x" prefix.
|
||||
Everything following the "0x" is output exactly like :meth:`bytes.hex`.
|
||||
"""
|
||||
return HEX_PREFIX_LOWER + (
|
||||
super().hex() if sep is None else super().hex(sep, bytes_per_sep)
|
||||
)
|
||||
|
||||
@overload
|
||||
def __getitem__(self, key: "SupportsIndex") -> int: # noqa: F811
|
||||
...
|
||||
|
||||
@overload # noqa: F811
|
||||
def __getitem__(self, key: slice) -> "HexBytes": # noqa: F811
|
||||
...
|
||||
|
||||
def __getitem__( # noqa: F811
|
||||
self, key: Union["SupportsIndex", slice]
|
||||
) -> Union[int, "HexBytes"]:
|
||||
result = super().__getitem__(key)
|
||||
return cast(int, result) if isinstance(key, int) else self.__class__(cast(bytes, result))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"HexBytes({self.hex()!r})"
|
||||
|
||||
@staticmethod
|
||||
def _to_bytes(val: BytesLike) -> bytes:
|
||||
"""
|
||||
Convert BytesLike input to bytes representation.
|
||||
|
||||
Args:
|
||||
val: bytes, str (hex), bytearray, or memoryview
|
||||
|
||||
Returns:
|
||||
bytes representation of the input
|
||||
"""
|
||||
if isinstance(val, bytes):
|
||||
return val
|
||||
if isinstance(val, str):
|
||||
return HexBytes.fromhex(val)
|
||||
return bytes(val)
|
||||
|
||||
@classmethod
|
||||
def fromhex(cls, hexstr: str) -> "HexBytes":
|
||||
"""
|
||||
Create HexBytes from hex string, handling optional 0x prefix.
|
||||
|
||||
Args:
|
||||
hexstr: Hex string with or without 0x/0X prefix
|
||||
|
||||
Returns:
|
||||
HexBytes instance
|
||||
"""
|
||||
cleaned_hex = hexstr.removeprefix(HEX_PREFIX_LOWER).removeprefix(HEX_PREFIX_UPPER)
|
||||
return super(HexBytes, cls).__new__(cls, bytes.fromhex(cleaned_hex))
|
||||
@@ -1,12 +1,12 @@
|
||||
import datetime # noqa: 251
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from hexbytes import HexBytes
|
||||
|
||||
from dlt.common.pendulum import pendulum
|
||||
from dlt.common.wei import Wei
|
||||
from dlt.common.data_types import TDataType
|
||||
from dlt.common.time import parse_iso_like_datetime
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
|
||||
|
||||
_NOW_TS: float = pendulum.now().timestamp()
|
||||
|
||||
@@ -35,7 +35,6 @@ dependencies = [
|
||||
"simplejson>=3.17.5",
|
||||
"PyYAML>=5.4.1",
|
||||
"semver>=3.0.0",
|
||||
"hexbytes>=0.2.2",
|
||||
"tzdata>=2022.1",
|
||||
"tomlkit>=0.11.3",
|
||||
"pathvalidate>=2.5.2",
|
||||
|
||||
@@ -2,7 +2,6 @@ import datetime # noqa: I251
|
||||
import hashlib
|
||||
from typing import Dict, List, Any, Sequence, Tuple, Literal, Union
|
||||
import base64
|
||||
from hexbytes import HexBytes
|
||||
from copy import deepcopy
|
||||
from string import ascii_lowercase
|
||||
import random
|
||||
@@ -22,6 +21,7 @@ from dlt.common.time import (
|
||||
ensure_pendulum_time,
|
||||
ensure_pendulum_date,
|
||||
)
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
from dlt.common.schema import TColumnSchema, TTableSchemaColumns
|
||||
|
||||
from tests.utils import TPythonTableFormat, TestDataItemFormat, arrow_item_from_pandas
|
||||
|
||||
74
tests/common/libs/test_hexbytes.py
Normal file
74
tests/common/libs/test_hexbytes.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import pytest
|
||||
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
|
||||
|
||||
def test_hexbytes_from_bytes():
|
||||
# Test creation from bytes
|
||||
binary_string = HexBytes(b"binary string")
|
||||
assert isinstance(binary_string, HexBytes)
|
||||
assert isinstance(binary_string, bytes)
|
||||
assert binary_string == b"binary string"
|
||||
assert binary_string.hex() == "0x62696e61727920737472696e67"
|
||||
|
||||
|
||||
def test_hexbytes_from_hex_string():
|
||||
# Test creation from hex string with and without 0x prefix
|
||||
hex_with_prefix = "0x62696e61727920737472696e67"
|
||||
hex_without_prefix = "62696e61727920737472696e67"
|
||||
|
||||
hex_bytes_with_prefix = HexBytes(hex_with_prefix)
|
||||
hex_bytes_without_prefix = HexBytes(hex_without_prefix)
|
||||
|
||||
assert hex_bytes_with_prefix == hex_bytes_without_prefix
|
||||
assert hex_bytes_with_prefix == b"binary string"
|
||||
assert hex_bytes_with_prefix.hex() == "0x62696e61727920737472696e67"
|
||||
|
||||
|
||||
def test_hexbytes_indexing():
|
||||
# Test indexing behavior
|
||||
test_bytes = HexBytes(b"binary")
|
||||
|
||||
# Test single item access
|
||||
first_byte = test_bytes[0]
|
||||
last_byte = test_bytes[-1]
|
||||
assert first_byte == ord("b") # should return int
|
||||
assert last_byte == ord("y")
|
||||
|
||||
# Test slicing
|
||||
middle_slice = test_bytes[1:3]
|
||||
assert isinstance(middle_slice, HexBytes) # slices should return HexBytes
|
||||
assert middle_slice == b"in"
|
||||
assert test_bytes[:2] == b"bi" # prefix slice
|
||||
assert test_bytes[-2:] == b"ry" # suffix slice
|
||||
|
||||
|
||||
def test_hexbytes_representation():
|
||||
# Test string representation
|
||||
test_bytes = HexBytes(b"test")
|
||||
assert repr(test_bytes) == "HexBytes('0x74657374')"
|
||||
|
||||
# Test actual bytes content
|
||||
assert bytes(test_bytes) == b"test"
|
||||
|
||||
|
||||
def test_hexbytes_comparison():
|
||||
# Test equality comparisons
|
||||
first_hex = HexBytes(b"test")
|
||||
same_as_first = HexBytes(b"test")
|
||||
different_hex = HexBytes(b"different")
|
||||
|
||||
assert first_hex == same_as_first
|
||||
assert first_hex != different_hex
|
||||
assert first_hex == b"test" # Compare with bytes
|
||||
assert first_hex != b"different"
|
||||
|
||||
|
||||
def test_hexbytes_hex_method():
|
||||
# Test hex() method specifically
|
||||
single_char = HexBytes(b"A")
|
||||
assert single_char.hex() == "0x41" # Should always include 0x prefix
|
||||
|
||||
# Test with empty bytes
|
||||
empty_bytes = HexBytes(b"")
|
||||
assert empty_bytes.hex() == "0x"
|
||||
@@ -3,7 +3,7 @@ from copy import copy
|
||||
from typing import Any, Type
|
||||
import pytest
|
||||
import datetime # noqa: I251
|
||||
from hexbytes import HexBytes
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
from enum import Enum
|
||||
|
||||
from pendulum.tz import UTC
|
||||
@@ -32,10 +32,9 @@ def test_coerce_type_to_text() -> None:
|
||||
# bytes to text (base64)
|
||||
assert coerce_value("text", "binary", b"binary string") == "YmluYXJ5IHN0cmluZw=="
|
||||
# HexBytes to text (hex with prefix)
|
||||
assert coerce_value("text", "binary", HexBytes(b"binary string")) in [
|
||||
"0x62696e61727920737472696e67",
|
||||
"62696e61727920737472696e67",
|
||||
]
|
||||
assert (
|
||||
coerce_value("text", "binary", HexBytes(b"binary string")) == "0x62696e61727920737472696e67"
|
||||
)
|
||||
|
||||
# Str enum value
|
||||
class StrEnum(Enum):
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from hexbytes import HexBytes
|
||||
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
from dlt.common import pendulum, Decimal, Wei
|
||||
from dlt.common.schema.utils import autodetect_sc_type
|
||||
from dlt.common.schema.detections import (
|
||||
|
||||
@@ -3,8 +3,7 @@ from pendulum import UTC
|
||||
import pytest
|
||||
from copy import deepcopy
|
||||
from typing import Any, Iterator, List, Sequence
|
||||
from hexbytes import HexBytes
|
||||
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
from dlt.common import Wei, Decimal, pendulum, json
|
||||
from dlt.common.configuration.container import Container
|
||||
from dlt.common.destination.capabilities import DestinationCapabilitiesContext
|
||||
|
||||
@@ -5,12 +5,12 @@ import os
|
||||
import contextlib
|
||||
from subprocess import CalledProcessError
|
||||
from typing import List, Tuple, Optional
|
||||
from hexbytes import HexBytes
|
||||
import pytest
|
||||
from unittest import mock
|
||||
import re
|
||||
from packaging.requirements import Requirement
|
||||
from typing import Dict
|
||||
from dlt.common.libs.hexbytes import HexBytes
|
||||
|
||||
# import that because O3 modules cannot be unloaded
|
||||
import cryptography.hazmat.bindings._rust
|
||||
|
||||
11
uv.lock
generated
11
uv.lock
generated
@@ -2048,7 +2048,6 @@ dependencies = [
|
||||
{ name = "fsspec" },
|
||||
{ name = "gitpython" },
|
||||
{ name = "giturlparse" },
|
||||
{ name = "hexbytes" },
|
||||
{ name = "humanize" },
|
||||
{ name = "jsonpath-ng" },
|
||||
{ name = "orjson", version = "3.10.18", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' and sys_platform != 'emscripten'" },
|
||||
@@ -2365,7 +2364,6 @@ requires-dist = [
|
||||
{ name = "google-cloud-bigquery", marker = "extra == 'gcp'", specifier = ">=2.26.0" },
|
||||
{ name = "grpcio", marker = "extra == 'bigquery'", specifier = ">=1.50.0" },
|
||||
{ name = "grpcio", marker = "extra == 'gcp'", specifier = ">=1.50.0" },
|
||||
{ name = "hexbytes", specifier = ">=0.2.2" },
|
||||
{ name = "humanize", specifier = ">=4.4.0" },
|
||||
{ name = "ibis-framework", marker = "python_full_version >= '3.10' and extra == 'workspace'", specifier = ">=10.5.0" },
|
||||
{ name = "jsonpath-ng", specifier = ">=1.5.3" },
|
||||
@@ -3749,15 +3747,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957, upload-time = "2025-02-01T11:02:26.481Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hexbytes"
|
||||
version = "0.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c1/94/fbfd526e8964652eec6a7b74ae18d1426e225ab602553858531ec6567d05/hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d", size = 6188, upload-time = "2023-06-08T20:36:59.73Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0b/9e/fdfe374c28d448a58563e7e43f569f8cf8cf600db092efac2e8ac2f86782/hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59", size = 5944, upload-time = "2023-06-08T20:36:58.066Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "4.1.0"
|
||||
|
||||
Reference in New Issue
Block a user