mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
drop python 3.8, enable python 3.13, and enable full linting for 3.12 (#2194)
* add python 3.12 linting * update locked versions to make project installable on py 3.12 * update flake8 * downgrade poetry for all tests relying on python3.8 * drop python 3.8 * enable python3.13 * copy test updates from python3.13 branch * update locked sentry version * pin poetry to 1.8.5 * install ibis outside of poetry * rename to workflows for consistency * switch to published alpha version of dlt-pendulum for python 3.13 * fix images * add note to readme
This commit is contained in:
6
.github/workflows/lint.yml
vendored
6
.github/workflows/lint.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
python-version: ["3.9.x", "3.10.x", "3.11.x"]
|
||||
python-version: ["3.9.x", "3.10.x", "3.11.x", "3.12.x"]
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -42,9 +42,9 @@ jobs:
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
|
||||
22
.github/workflows/test_common.yml
vendored
22
.github/workflows/test_common.yml
vendored
@@ -43,6 +43,10 @@ jobs:
|
||||
os: "ubuntu-latest"
|
||||
- python-version: "3.12.x"
|
||||
os: "ubuntu-latest"
|
||||
- python-version: "3.13.x"
|
||||
os: "ubuntu-latest"
|
||||
- python-version: "3.13.x"
|
||||
os: "windows-latest"
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -115,7 +119,7 @@ jobs:
|
||||
shell: cmd
|
||||
|
||||
- name: Install pyarrow
|
||||
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk && poetry run pip install pyarrow==15.0.2
|
||||
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk
|
||||
|
||||
- run: |
|
||||
poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow
|
||||
@@ -128,7 +132,7 @@ jobs:
|
||||
shell: cmd
|
||||
|
||||
- name: Install pipeline and sources dependencies
|
||||
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources && poetry run pip install pyarrow==15.0.2
|
||||
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources
|
||||
|
||||
- run: |
|
||||
poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources
|
||||
@@ -154,20 +158,6 @@ jobs:
|
||||
name: Run extract tests Windows
|
||||
shell: cmd
|
||||
|
||||
# here we upgrade pyarrow to 17 and run the libs tests again
|
||||
- name: Install pyarrow 17
|
||||
run: poetry run pip install pyarrow==17.0.0
|
||||
|
||||
- run: |
|
||||
poetry run pytest tests/libs
|
||||
if: runner.os != 'Windows'
|
||||
name: Run libs tests Linux/MAC
|
||||
- run: |
|
||||
poetry run pytest tests/libs
|
||||
if: runner.os == 'Windows'
|
||||
name: Run libs tests Windows
|
||||
shell: cmd
|
||||
|
||||
# - name: Install Pydantic 1.0
|
||||
# run: pip install "pydantic<2"
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- run: |
|
||||
poetry run pytest tests/load --ignore tests/load/sources
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-motherduck
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
2
.github/workflows/test_destination_mssql.yml
vendored
2
.github/workflows/test_destination_mssql.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline,ibis && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"
|
||||
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse] && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline,ibis
|
||||
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: create secrets.toml
|
||||
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
|
||||
|
||||
2
.github/workflows/test_destinations.yml
vendored
2
.github/workflows/test_destinations.yml
vendored
@@ -78,7 +78,7 @@ jobs:
|
||||
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg
|
||||
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake -E pyiceberg && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: Upgrade sqlalchemy
|
||||
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`
|
||||
|
||||
@@ -96,7 +96,7 @@ jobs:
|
||||
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations
|
||||
|
||||
- name: Install dependencies
|
||||
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg
|
||||
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake -E pyiceberg && poetry run pip install ibis-framework[duckdb,postgres,bigquery,snowflake,mssql,clickhouse]
|
||||
|
||||
- name: Upgrade sqlalchemy
|
||||
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`
|
||||
2
.github/workflows/test_doc_snippets.yml
vendored
2
.github/workflows/test_doc_snippets.yml
vendored
@@ -73,7 +73,7 @@ jobs:
|
||||
node-version: 20
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -143,4 +143,5 @@ tmp
|
||||
local_cache/
|
||||
|
||||
# test file for examples are generated and should not be committed
|
||||
docs/examples/**/test*.py
|
||||
docs/examples/**/test*.py
|
||||
compiled_requirements.txt
|
||||
9
Makefile
9
Makefile
@@ -102,11 +102,14 @@ publish-library: build-library
|
||||
poetry publish
|
||||
|
||||
test-build-images: build-library
|
||||
# TODO: enable when we can remove special duckdb setting for python 3.12
|
||||
# NOTE: poetry export does not work with our many different deps, we install a subset and freeze
|
||||
# poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
|
||||
# grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
|
||||
poetry install --no-interaction -E gcp -E redshift -E duckdb
|
||||
poetry run pip freeze > _gen_requirements.txt
|
||||
# filter out libs that need native compilation
|
||||
grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
|
||||
docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
|
||||
# docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
|
||||
docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
|
||||
|
||||
preprocess-docs:
|
||||
# run docs preprocessing to run a few checks and ensure examples can be parsed
|
||||
|
||||
@@ -28,7 +28,7 @@ Be it a Google Colab notebook, AWS Lambda function, an Airflow DAG, your local l
|
||||
|
||||
## Installation
|
||||
|
||||
dlt supports Python 3.8+.
|
||||
dlt supports Python 3.9+. Python 3.13 is supported but considered experimental at this time as not all of dlts extras have python 3.13. support. We additionally maintain a [forked version of pendulum](https://github.com/dlt-hub/pendulum) for 3.13 until there is a release for 3.13.
|
||||
|
||||
```sh
|
||||
pip install dlt
|
||||
|
||||
@@ -1 +1 @@
|
||||
cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c
|
||||
pyarrow\|cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM alpine:3.15
|
||||
FROM python:3.11.11-alpine
|
||||
|
||||
# Metadata
|
||||
LABEL org.label-schema.vendor="dltHub" \
|
||||
@@ -17,16 +17,28 @@ ADD compiled_requirements.txt .
|
||||
|
||||
# install alpine deps
|
||||
RUN apk update &&\
|
||||
apk add --no-cache python3 ca-certificates curl postgresql &&\
|
||||
apk add --no-cache --virtual build-deps build-base automake autoconf libtool python3-dev postgresql-dev libffi-dev linux-headers gcc musl-dev &&\
|
||||
ln -s /usr/bin/python3 /usr/bin/python &&\
|
||||
apk add --no-cache ca-certificates curl postgresql git &&\
|
||||
apk add --no-cache --virtual build-deps build-base automake autoconf libtool python3-dev postgresql-dev libffi-dev linux-headers gcc musl-dev cmake &&\
|
||||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py &&\
|
||||
rm get-pip.py &&\
|
||||
pip3 install --upgrade setuptools wheel &&\
|
||||
rm -r /usr/lib/python*/ensurepip &&\
|
||||
pip3 install -r compiled_requirements.txt &&\
|
||||
pip install --upgrade setuptools wheel pip &&\
|
||||
rm -r /usr/lib/python*/ensurepip
|
||||
|
||||
# install arrow 17.0.0, usually we would need apache-arrow-dev=17.0.0 but it is not available in alpine 3.20
|
||||
# adapt this version to the arrow version you need
|
||||
RUN git clone --no-checkout https://github.com/apache/arrow.git /arrow \
|
||||
&& cd /arrow \
|
||||
&& git checkout tags/apache-arrow-17.0.0 \
|
||||
&& cd cpp \
|
||||
&& mkdir build \
|
||||
&& cd build \
|
||||
&& cmake -DARROW_CSV=ON -DARROW_JSON=ON -DARROW_FILESYSTEM=ON .. \
|
||||
&& make -j$(nproc) \
|
||||
&& make install
|
||||
|
||||
RUN pip install -r compiled_requirements.txt &&\
|
||||
apk del --purge build-deps
|
||||
#rm -r /root/.cache
|
||||
|
||||
|
||||
# add build labels and envs
|
||||
ARG COMMIT_SHA=""
|
||||
@@ -38,7 +50,7 @@ ENV IMAGE_VERSION=${IMAGE_VERSION}
|
||||
|
||||
# install exactly the same version of the library we used to build
|
||||
COPY dist/dlt-${IMAGE_VERSION}.tar.gz .
|
||||
RUN pip3 install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]
|
||||
RUN pip install /tmp/pydlt/dlt-${IMAGE_VERSION}.tar.gz[gcp,redshift,duckdb]
|
||||
|
||||
WORKDIR /
|
||||
RUN rm -r /tmp/pydlt
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM apache/airflow:2.5.2-python3.8
|
||||
FROM apache/airflow:2.5.2-python3.9
|
||||
|
||||
# Metadata
|
||||
LABEL org.label-schema.vendor="dltHub" \
|
||||
|
||||
@@ -2,6 +2,8 @@ import re
|
||||
import abc
|
||||
import os
|
||||
import yaml
|
||||
import ast
|
||||
|
||||
from yaml import Dumper
|
||||
from itertools import chain
|
||||
from typing import List, Optional, Sequence, Tuple, Any, Dict
|
||||
@@ -22,7 +24,7 @@ from dlt.common.configuration.providers import (
|
||||
from dlt.common.git import get_origin, get_repo, Repo
|
||||
from dlt.common.configuration.specs.runtime_configuration import get_default_pipeline_name
|
||||
from dlt.common.typing import StrAny
|
||||
from dlt.common.reflection.utils import evaluate_node_literal, ast_unparse
|
||||
from dlt.common.reflection.utils import evaluate_node_literal
|
||||
from dlt.common.pipeline import LoadInfo, TPipelineState, get_dlt_repos_dir
|
||||
from dlt.common.storages import FileStorage
|
||||
from dlt.common.utils import set_working_dir
|
||||
@@ -312,7 +314,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
|
||||
if f_r_value is None:
|
||||
fmt.warning(
|
||||
"The value of `dev_mode` in call to `dlt.pipeline` cannot be"
|
||||
f" determined from {ast_unparse(f_r_node).strip()}. We assume that you know"
|
||||
f" determined from {ast.unparse(f_r_node).strip()}. We assume that you know"
|
||||
" what you are doing :)"
|
||||
)
|
||||
if f_r_value is True:
|
||||
@@ -330,7 +332,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
|
||||
raise CliCommandInnerException(
|
||||
"deploy",
|
||||
"The value of 'pipelines_dir' argument in call to `dlt_pipeline` cannot be"
|
||||
f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
|
||||
f" determined from {ast.unparse(p_d_node).strip()}. Pipeline working dir"
|
||||
" will be found. Pass it directly with --pipelines-dir option.",
|
||||
)
|
||||
|
||||
@@ -341,7 +343,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
|
||||
raise CliCommandInnerException(
|
||||
"deploy",
|
||||
"The value of 'pipeline_name' argument in call to `dlt_pipeline` cannot be"
|
||||
f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
|
||||
f" determined from {ast.unparse(p_d_node).strip()}. Pipeline working dir"
|
||||
" will be found. Pass it directly with --pipeline-name option.",
|
||||
)
|
||||
pipelines.append((pipeline_name, pipelines_dir))
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Dict, Tuple, Set, List
|
||||
|
||||
from dlt.common.configuration import is_secret_hint
|
||||
from dlt.common.configuration.specs import BaseConfiguration
|
||||
from dlt.common.reflection.utils import creates_func_def_name_node, ast_unparse
|
||||
from dlt.common.reflection.utils import creates_func_def_name_node
|
||||
from dlt.common.typing import is_optional_type
|
||||
|
||||
from dlt.sources import SourceReference
|
||||
@@ -63,7 +63,7 @@ def find_source_calls_to_replace(
|
||||
for calls in visitor.known_sources_resources_calls.values():
|
||||
for call in calls:
|
||||
transformed_nodes.append(
|
||||
(call.func, ast.Name(id=pipeline_name + "_" + ast_unparse(call.func)))
|
||||
(call.func, ast.Name(id=pipeline_name + "_" + ast.unparse(call.func)))
|
||||
)
|
||||
|
||||
return transformed_nodes
|
||||
|
||||
@@ -44,7 +44,6 @@ def resolve_paths(paths: TAnyJsonPath, data: DictStrAny) -> List[str]:
|
||||
>>> # ['a.items.[0].b', 'a.items.[1].b']
|
||||
"""
|
||||
paths = compile_paths(paths)
|
||||
p: JSONPath
|
||||
return list(chain.from_iterable((str(r.full_path) for r in p.find(data)) for p in paths))
|
||||
|
||||
|
||||
|
||||
@@ -248,20 +248,12 @@ def apply_schema_contract_to_model(
|
||||
return Annotated[_process_annotation(a_t), tuple(a_m)] # type: ignore[return-value]
|
||||
elif is_list_generic_type(t_):
|
||||
l_t: Type[Any] = get_args(t_)[0]
|
||||
try:
|
||||
return get_origin(t_)[_process_annotation(l_t)] # type: ignore[no-any-return]
|
||||
except TypeError:
|
||||
# this is Python3.8 fallback. it does not support indexers on types
|
||||
return List[_process_annotation(l_t)] # type: ignore
|
||||
return get_origin(t_)[_process_annotation(l_t)] # type: ignore[no-any-return]
|
||||
elif is_dict_generic_type(t_):
|
||||
k_t: Type[Any]
|
||||
v_t: Type[Any]
|
||||
k_t, v_t = get_args(t_)
|
||||
try:
|
||||
return get_origin(t_)[k_t, _process_annotation(v_t)] # type: ignore[no-any-return]
|
||||
except TypeError:
|
||||
# this is Python3.8 fallback. it does not support indexers on types
|
||||
return Dict[k_t, _process_annotation(v_t)] # type: ignore
|
||||
return get_origin(t_)[k_t, _process_annotation(v_t)] # type: ignore[no-any-return]
|
||||
elif is_union_type(t_):
|
||||
u_t_s = tuple(_process_annotation(u_t) for u_t in extract_union_types(t_))
|
||||
return Union[u_t_s] # type: ignore[return-value]
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
import ast
|
||||
import inspect
|
||||
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Callable
|
||||
|
||||
try:
|
||||
import astunparse
|
||||
|
||||
ast_unparse: Callable[[ast.AST], str] = astunparse.unparse
|
||||
except ImportError:
|
||||
ast_unparse = ast.unparse # type: ignore[attr-defined, unused-ignore]
|
||||
|
||||
from dlt.common.typing import AnyFun
|
||||
|
||||
|
||||
@@ -31,7 +25,7 @@ def get_literal_defaults(node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) ->
|
||||
literal_defaults: Dict[str, str] = {}
|
||||
for arg, default in zip(reversed(args), reversed(defaults)):
|
||||
if default:
|
||||
literal_defaults[str(arg.arg)] = ast_unparse(default).strip()
|
||||
literal_defaults[str(arg.arg)] = ast.unparse(default).strip()
|
||||
|
||||
return literal_defaults
|
||||
|
||||
@@ -105,7 +99,7 @@ def rewrite_python_script(
|
||||
script_lines.append(source_script_lines[last_line][last_offset : node.col_offset]) # type: ignore[attr-defined]
|
||||
|
||||
# replace node value
|
||||
script_lines.append(ast_unparse(t_value).strip())
|
||||
script_lines.append(ast.unparse(t_value).strip())
|
||||
last_line = node.end_lineno - 1 # type: ignore[attr-defined]
|
||||
last_offset = node.end_col_offset # type: ignore[attr-defined]
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ class NullExecutor(Executor):
|
||||
Provides a uniform interface for `None` pool type
|
||||
"""
|
||||
|
||||
def submit(self, fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> Future[T]:
|
||||
def submit(self, fn: Callable[P, T], /, *args: P.args, **kwargs: P.kwargs) -> Future[T]:
|
||||
"""Run the job and return a Future"""
|
||||
fut: Future[T] = Future()
|
||||
try:
|
||||
|
||||
@@ -57,15 +57,11 @@ except ImportError:
|
||||
# in versions of Python>=3.10.
|
||||
UnionType = Never
|
||||
|
||||
if sys.version_info[:3] >= (3, 9, 0):
|
||||
from typing import _SpecialGenericAlias, _GenericAlias # type: ignore[attr-defined]
|
||||
from types import GenericAlias # type: ignore[attr-defined]
|
||||
from typing import _SpecialGenericAlias, _GenericAlias # type: ignore[attr-defined]
|
||||
from types import GenericAlias
|
||||
|
||||
typingGenericAlias: Tuple[Any, ...] = (_GenericAlias, _SpecialGenericAlias, GenericAlias)
|
||||
else:
|
||||
from typing import _GenericAlias # type: ignore[attr-defined]
|
||||
typingGenericAlias: Tuple[Any, ...] = (_GenericAlias, _SpecialGenericAlias, GenericAlias)
|
||||
|
||||
typingGenericAlias = (_GenericAlias,)
|
||||
|
||||
from dlt.common.pendulum import timedelta, pendulum
|
||||
|
||||
@@ -334,7 +330,7 @@ def is_typeddict(t: Type[Any]) -> bool:
|
||||
|
||||
def is_annotated(ann_type: Any) -> bool:
|
||||
try:
|
||||
return issubclass(get_origin(ann_type), Annotated) # type: ignore[arg-type]
|
||||
return get_origin(ann_type) is Annotated
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
|
||||
@@ -28,8 +28,7 @@ from dlt.common.storages.fsspec_filesystem import (
|
||||
from dlt.common.schema import TColumnSchema, Schema
|
||||
from dlt.common.schema.typing import TColumnType
|
||||
from dlt.common.storages import FilesystemConfiguration, fsspec_from_config
|
||||
|
||||
from dlt.destinations.insert_job_client import InsertValuesJobClient
|
||||
from dlt.destinations.job_client_impl import SqlJobClientWithStagingDataset
|
||||
from dlt.destinations.exceptions import LoadJobTerminalException
|
||||
from dlt.destinations.impl.databricks.configuration import DatabricksClientConfiguration
|
||||
from dlt.destinations.impl.databricks.sql_client import DatabricksSqlClient
|
||||
@@ -198,7 +197,7 @@ class DatabricksMergeJob(SqlMergeFollowupJob):
|
||||
"""
|
||||
|
||||
|
||||
class DatabricksClient(InsertValuesJobClient, SupportsStagingDestination):
|
||||
class DatabricksClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
|
||||
def __init__(
|
||||
self,
|
||||
schema: Schema,
|
||||
@@ -213,7 +212,7 @@ class DatabricksClient(InsertValuesJobClient, SupportsStagingDestination):
|
||||
)
|
||||
super().__init__(schema, config, sql_client)
|
||||
self.config: DatabricksClientConfiguration = config
|
||||
self.sql_client: DatabricksSqlClient = sql_client
|
||||
self.sql_client: DatabricksSqlClient = sql_client # type: ignore[assignment, unused-ignore]
|
||||
self.type_mapper = self.capabilities.get_type_mapper()
|
||||
|
||||
def create_load_job(
|
||||
|
||||
@@ -16,7 +16,7 @@ from typing import (
|
||||
)
|
||||
|
||||
from databricks.sdk.core import Config, oauth_service_principal
|
||||
from databricks import sql as databricks_lib # type: ignore[attr-defined]
|
||||
from databricks import sql as databricks_lib
|
||||
from databricks.sql.client import (
|
||||
Connection as DatabricksSqlConnection,
|
||||
Cursor as DatabricksSqlCursor,
|
||||
@@ -43,7 +43,7 @@ from dlt.common.destination.reference import DBApiCursor
|
||||
class DatabricksCursorImpl(DBApiCursorImpl):
|
||||
"""Use native data frame support if available"""
|
||||
|
||||
native_cursor: DatabricksSqlCursor
|
||||
native_cursor: DatabricksSqlCursor # type: ignore[assignment, unused-ignore]
|
||||
vector_size: ClassVar[int] = 2048 # vector size is 2048
|
||||
|
||||
def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]:
|
||||
@@ -140,7 +140,6 @@ class DatabricksSqlClient(SqlClientBase[DatabricksSqlConnection], DBTransaction)
|
||||
@contextmanager
|
||||
@raise_database_error
|
||||
def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DBApiCursor]:
|
||||
curr: DBApiCursor
|
||||
# TODO: Inline param support will be dropped in future databricks driver, switch to :named paramstyle
|
||||
# This will drop support for cluster runtime v13.x
|
||||
# db_args: Optional[Dict[str, Any]]
|
||||
@@ -159,10 +158,11 @@ class DatabricksSqlClient(SqlClientBase[DatabricksSqlConnection], DBTransaction)
|
||||
# else:
|
||||
# db_args = kwargs or None
|
||||
|
||||
assert isinstance(query, str)
|
||||
db_args = args or kwargs or None
|
||||
with self._conn.cursor() as curr:
|
||||
curr.execute(query, db_args)
|
||||
yield DatabricksCursorImpl(curr) # type: ignore[abstract]
|
||||
yield DatabricksCursorImpl(curr) # type: ignore[arg-type, abstract, unused-ignore]
|
||||
|
||||
def catalog_name(self, escape: bool = True) -> Optional[str]:
|
||||
catalog = self.capabilities.casefold_identifier(self.credentials.catalog)
|
||||
|
||||
@@ -536,7 +536,7 @@ class LanceDBClient(JobClientBase, WithStateSync):
|
||||
self.schema.naming.normalize_identifier(
|
||||
"engine_version"
|
||||
): self.schema.ENGINE_VERSION,
|
||||
self.schema.naming.normalize_identifier("inserted_at"): str(pendulum.now()),
|
||||
self.schema.naming.normalize_identifier("inserted_at"): pendulum.now(),
|
||||
self.schema.naming.normalize_identifier("schema_name"): self.schema.name,
|
||||
self.schema.naming.normalize_identifier(
|
||||
"version_hash"
|
||||
@@ -693,7 +693,7 @@ class LanceDBClient(JobClientBase, WithStateSync):
|
||||
self.schema.naming.normalize_identifier("load_id"): load_id,
|
||||
self.schema.naming.normalize_identifier("schema_name"): self.schema.name,
|
||||
self.schema.naming.normalize_identifier("status"): 0,
|
||||
self.schema.naming.normalize_identifier("inserted_at"): str(pendulum.now()),
|
||||
self.schema.naming.normalize_identifier("inserted_at"): pendulum.now(),
|
||||
self.schema.naming.normalize_identifier("schema_version_hash"): None,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -24,12 +24,15 @@ from dlt.extract.items import TTableHintTemplate
|
||||
|
||||
try:
|
||||
from dlt.common.libs import pyarrow
|
||||
from dlt.common.libs.numpy import numpy
|
||||
from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem
|
||||
from dlt.common.libs.pyarrow import from_arrow_scalar, to_arrow_scalar
|
||||
except MissingDependencyException:
|
||||
pa = None
|
||||
pyarrow = None
|
||||
|
||||
try:
|
||||
from dlt.common.libs.numpy import numpy
|
||||
except MissingDependencyException:
|
||||
numpy = None
|
||||
|
||||
# NOTE: always import pandas independently from pyarrow
|
||||
@@ -323,7 +326,9 @@ class ArrowIncremental(IncrementalTransform):
|
||||
"""Creates unique index if necessary."""
|
||||
# create unique index if necessary
|
||||
if self._dlt_index not in tbl.schema.names:
|
||||
tbl = pyarrow.append_column(tbl, self._dlt_index, pa.array(numpy.arange(tbl.num_rows)))
|
||||
# indices = pa.compute.sequence(start=0, step=1, length=tbl.num_rows,
|
||||
indices = pa.array(range(tbl.num_rows))
|
||||
tbl = pyarrow.append_column(tbl, self._dlt_index, indices)
|
||||
return tbl
|
||||
|
||||
def __call__(
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import inspect
|
||||
import ast
|
||||
from ast import NodeVisitor
|
||||
from ast import NodeVisitor, unparse
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from dlt.common.reflection.utils import find_outer_func_def, ast_unparse
|
||||
from dlt.common.reflection.utils import find_outer_func_def
|
||||
|
||||
import dlt.reflection.names as n
|
||||
|
||||
@@ -67,9 +67,9 @@ class PipelineScriptVisitor(NodeVisitor):
|
||||
for deco in node.decorator_list:
|
||||
# decorators can be function calls, attributes or names
|
||||
if isinstance(deco, (ast.Name, ast.Attribute)):
|
||||
alias_name = ast_unparse(deco).strip()
|
||||
alias_name = ast.unparse(deco).strip()
|
||||
elif isinstance(deco, ast.Call):
|
||||
alias_name = ast_unparse(deco.func).strip()
|
||||
alias_name = ast.unparse(deco.func).strip()
|
||||
else:
|
||||
raise ValueError(
|
||||
self.source_segment(deco), type(deco), "Unknown decorator form"
|
||||
@@ -86,7 +86,7 @@ class PipelineScriptVisitor(NodeVisitor):
|
||||
def visit_Call(self, node: ast.Call) -> Any:
|
||||
if self._curr_pass == 2:
|
||||
# check if this is a call to any of known functions
|
||||
alias_name = ast_unparse(node.func).strip()
|
||||
alias_name = ast.unparse(node.func).strip()
|
||||
fn = self.func_aliases.get(alias_name)
|
||||
if not fn:
|
||||
# try a fallback to "run" function that may be called on pipeline or source
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Generic API Source"""
|
||||
from copy import deepcopy
|
||||
from typing import Any, Dict, List, Optional, Generator, Callable, cast, Union
|
||||
import graphlib # type: ignore[import,unused-ignore]
|
||||
import graphlib
|
||||
from requests.auth import AuthBase
|
||||
|
||||
import dlt
|
||||
@@ -229,7 +229,7 @@ def rest_api_resources(config: RESTAPIConfig) -> List[DltResource]:
|
||||
|
||||
def create_resources(
|
||||
client_config: ClientConfig,
|
||||
dependency_graph: graphlib.TopologicalSorter,
|
||||
dependency_graph: graphlib.TopologicalSorter, # type: ignore[type-arg]
|
||||
endpoint_resource_map: Dict[str, Union[EndpointResource, DltResource]],
|
||||
resolved_param_map: Dict[str, Optional[List[ResolvedParam]]],
|
||||
) -> Dict[str, DltResource]:
|
||||
|
||||
@@ -12,7 +12,7 @@ from typing import (
|
||||
cast,
|
||||
NamedTuple,
|
||||
)
|
||||
import graphlib # type: ignore[import,unused-ignore]
|
||||
import graphlib
|
||||
import string
|
||||
from requests import Response
|
||||
|
||||
@@ -277,10 +277,12 @@ def make_parent_key_name(resource_name: str, field_name: str) -> str:
|
||||
def build_resource_dependency_graph(
|
||||
resource_defaults: EndpointResourceBase,
|
||||
resource_list: List[Union[str, EndpointResource, DltResource]],
|
||||
) -> Tuple[
|
||||
Any, Dict[str, Union[EndpointResource, DltResource]], Dict[str, Optional[List[ResolvedParam]]]
|
||||
) -> Tuple[ # type: ignore[type-arg]
|
||||
graphlib.TopologicalSorter,
|
||||
Dict[str, Union[EndpointResource, DltResource]],
|
||||
Dict[str, Optional[List[ResolvedParam]]],
|
||||
]:
|
||||
dependency_graph = graphlib.TopologicalSorter()
|
||||
dependency_graph: graphlib.TopologicalSorter = graphlib.TopologicalSorter() # type: ignore[type-arg]
|
||||
resolved_param_map: Dict[str, Optional[List[ResolvedParam]]] = {}
|
||||
endpoint_resource_map = expand_and_index_resources(resource_list, resource_defaults)
|
||||
|
||||
|
||||
15
mypy.ini
15
mypy.ini
@@ -1,5 +1,5 @@
|
||||
[mypy]
|
||||
python_version=3.8
|
||||
python_version=3.9
|
||||
ignore_missing_imports=false
|
||||
strict_optional=false
|
||||
warn_redundant_casts=true
|
||||
@@ -24,9 +24,6 @@ disallow_untyped_defs=false
|
||||
[mypy-jsonpath_ng.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-astunparse.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-google.oauth2.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
@@ -89,6 +86,9 @@ ignore_missing_imports=true
|
||||
[mypy-pandas.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-numpy.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-apiclient.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
@@ -101,8 +101,10 @@ ignore_missing_imports=true
|
||||
|
||||
[mypy-connectorx]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-s3fs.*]
|
||||
ignore_missing_imports=true
|
||||
|
||||
[mypy-win_precise_time]
|
||||
ignore_missing_imports=true
|
||||
|
||||
@@ -121,6 +123,9 @@ ignore_missing_imports = True
|
||||
[mypy-pytz.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-sentry_sdk.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-tornado.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
@@ -130,7 +135,7 @@ ignore_missing_imports = True
|
||||
[mypy-snowflake.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-backports.*]
|
||||
[mypy-pendulum.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-time_machine.*]
|
||||
|
||||
2510
poetry.lock
generated
2510
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -9,11 +9,16 @@ license = "Apache-2.0"
|
||||
homepage = "https://github.com/dlt-hub"
|
||||
repository = "https://github.com/dlt-hub/dlt"
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Topic :: Software Development :: Libraries",
|
||||
"Typing :: Typed",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Operating System :: Microsoft :: Windows",]
|
||||
@@ -24,9 +29,13 @@ packages = [
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<3.13"
|
||||
python = ">=3.9,<3.14"
|
||||
requests = ">=2.26.0"
|
||||
pendulum = ">=2.1.2"
|
||||
pendulum = {version = ">=2.1.2", markers = "python_version < '3.13'"}
|
||||
# NOTE: revert to normal pendulum once it supports python 3.13
|
||||
dlt-pendulum = [
|
||||
{version="3.0.1a1", allow-prereleases = true, markers = "python_version >= '3.13'"}
|
||||
]
|
||||
simplejson = ">=3.17.5"
|
||||
PyYAML = ">=5.4.1"
|
||||
semver = ">=3.0.0"
|
||||
@@ -40,7 +49,6 @@ click = ">=7.1"
|
||||
requirements-parser = ">=0.5.0"
|
||||
setuptools = ">=65.6.0"
|
||||
humanize = ">=4.4.0"
|
||||
astunparse = { "version" = ">=1.6.3", "python" = "<3.9"}
|
||||
gitpython = ">=3.1.29"
|
||||
pytz = ">=2022.6"
|
||||
giturlparse = ">=0.10.0"
|
||||
@@ -52,7 +60,6 @@ fsspec = ">=2022.4.0"
|
||||
packaging = ">=21.1"
|
||||
pluggy = ">=1.3.0"
|
||||
win-precise-time = {version = ">=1.4.2", markers="os_name == 'nt'"}
|
||||
graphlib-backport = {version = "*", python = "<3.9"}
|
||||
|
||||
psycopg2-binary = {version = ">=2.9.1", optional = true}
|
||||
# use this dependency as the current version of psycopg2cffi does not have sql module
|
||||
@@ -60,11 +67,14 @@ psycopg2-binary = {version = ">=2.9.1", optional = true}
|
||||
psycopg2cffi = {version = ">=2.9.0", optional = true, markers="platform_python_implementation == 'PyPy'"}
|
||||
grpcio = {version = ">=1.50.0", optional = true}
|
||||
google-cloud-bigquery = {version = ">=2.26.0", optional = true}
|
||||
pyarrow = {version = ">=12.0.0", optional = true}
|
||||
pyarrow = [
|
||||
{version = ">=12.0.0,<18", markers = "python_version >= '3.9' and python_version < '3.13'", optional = true},
|
||||
{version = ">=18.0.0", markers = "python_version >= '3.13'", optional = true}
|
||||
]
|
||||
duckdb = {version = ">=0.9", optional = true}
|
||||
# keep per-python version dependency as a reference
|
||||
# duckdb = [
|
||||
# {version = ">=0.6.1,<0.10.0", python = ">=3.8,<3.12", optional = true},
|
||||
# {version = ">=0.6.1,<0.10.0", python = ">=3.9,<3.12", optional = true},
|
||||
# {version = ">=0.10.0,<0.11.0", python = ">=3.12", optional = true}
|
||||
# ]
|
||||
s3fs = {version = ">=2022.4.0", optional = true}
|
||||
@@ -78,10 +88,13 @@ weaviate-client = {version = ">=3.22", optional = true}
|
||||
adlfs = {version = ">=2024.7.0", optional = true}
|
||||
pyodbc = {version = ">=4.0.39", optional = true}
|
||||
qdrant-client = {version = ">=1.8", optional = true, extras = ["fastembed"]}
|
||||
databricks-sql-connector = {version = ">=2.9.3", optional = true}
|
||||
databricks-sql-connector = [
|
||||
{version = ">=2.9.3,<4", optional = true, markers = "python_version <= '3.12'"},
|
||||
{version = ">=3.6.0", optional = true, markers = "python_version >= '3.13'"},
|
||||
]
|
||||
clickhouse-driver = { version = ">=0.2.7", optional = true }
|
||||
clickhouse-connect = { version = ">=0.7.7", optional = true }
|
||||
lancedb = { version = ">=0.8.2", optional = true, markers = "python_version >= '3.9'", allow-prereleases = true }
|
||||
lancedb = { version = ">=0.8.2", optional = true, markers = "python_version < '3.13'", allow-prereleases = true }
|
||||
tantivy = { version = ">= 0.22.0", optional = true }
|
||||
deltalake = { version = ">=0.21.0", optional = true }
|
||||
sqlalchemy = { version = ">=1.4", optional = true }
|
||||
@@ -94,8 +107,9 @@ db-dtypes = { version = ">=1.2.0", optional = true }
|
||||
# https://github.com/apache/airflow/issues/28723
|
||||
# pyiceberg = { version = ">=0.7.1", optional = true, extras = ["sql-sqlite"] }
|
||||
# we will rely on manual installation of `sqlalchemy>=2.0.18` instead
|
||||
pyiceberg = { version = ">=0.8.1", python = ">=3.9", optional = true }
|
||||
pyiceberg = { version = ">=0.8.1", optional = true }
|
||||
databricks-sdk = {version = ">=0.38.0", optional = true}
|
||||
pywin32 = {version = ">=306", optional = true, platform = "win32"}
|
||||
|
||||
[tool.poetry.extras]
|
||||
gcp = ["grpcio", "google-cloud-bigquery", "db-dtypes", "gcsfs"]
|
||||
@@ -132,10 +146,10 @@ postgis = ["psycopg2-binary", "psycopg2cffi"]
|
||||
dlt = "dlt.cli._dlt:_main"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
cffi = "^1.16"
|
||||
greenlet = "^3.0.3"
|
||||
regex = "^2023.10"
|
||||
pendulum = ">=3"
|
||||
cffi = ">=1.16"
|
||||
greenlet = ">=3.1"
|
||||
regex = ">=2024.10"
|
||||
sqlalchemy = "<2"
|
||||
requests-mock = "^1.10.0"
|
||||
types-click = "^7.1.8"
|
||||
sqlfluff = "^2.3.2"
|
||||
@@ -143,7 +157,7 @@ types-deprecated = "^1.2.9.2"
|
||||
pytest-console-scripts = "^1.4.1"
|
||||
pytest = "^7.0.0"
|
||||
mypy = ">=1.11.0,<1.13.0"
|
||||
flake8 = "^5.0.0"
|
||||
flake8 = "^7.0.0"
|
||||
bandit = "^1.7.0"
|
||||
black = "^23.7.0"
|
||||
isort = "^5.12.0"
|
||||
@@ -181,8 +195,8 @@ shapely = ">=2.0.6"
|
||||
optional = true
|
||||
[tool.poetry.group.sources.dependencies]
|
||||
connectorx = [
|
||||
{version = "0.3.2", python = "3.8"},
|
||||
{version = ">=0.3.3", python = ">=3.9"}
|
||||
{version = ">=0.3.3", python = ">=3.9"},
|
||||
{version = ">=0.4.0", python = ">=3.10"}
|
||||
]
|
||||
pymysql = "^1.1.0"
|
||||
openpyxl = "^3"
|
||||
@@ -195,10 +209,11 @@ google-auth-oauthlib = "^1.0.0"
|
||||
tqdm = "^4.65.0"
|
||||
enlighten = "^1.11.2"
|
||||
alive-progress = "^3.1.1"
|
||||
pydantic = ">2"
|
||||
pydantic = ">=2.10"
|
||||
numpy = [
|
||||
{ version = ">=1.21", python = ">=3.8,<3.12" },
|
||||
{ version = ">=1.26", python = ">=3.12" }
|
||||
{ version = ">=1.21", python = ">=3.9,<3.12" },
|
||||
{ version = ">=1.26", python = ">=3.12" },
|
||||
{ version = ">=2.0.0", python = ">=3.13" }
|
||||
]
|
||||
pandas = [
|
||||
{version = ">2.1", markers = "python_version >= '3.12'"},
|
||||
@@ -212,11 +227,12 @@ optional = true
|
||||
[tool.poetry.group.airflow.dependencies]
|
||||
apache-airflow = {version = "^2.8.0", markers = "python_version < '3.12'"}
|
||||
|
||||
[tool.poetry.group.ibis]
|
||||
optional = true
|
||||
# TODO: restore when ibis allows pyarrow 18
|
||||
# [tool.poetry.group.ibis]
|
||||
#optional = true
|
||||
|
||||
[tool.poetry.group.ibis.dependencies]
|
||||
ibis-framework = { version = ">=9.0.0,<10.0.0", markers = "python_version >= '3.10'", extras = ["duckdb", "postgres", "bigquery", "snowflake", "mssql", "clickhouse"]}
|
||||
#[tool.poetry.group.ibis.dependencies]
|
||||
# ibis-framework = { version = ">=9.0.0,<10.0.0", markers = "python_version >= '3.10'", extras = ["duckdb", "postgres", "bigquery", "snowflake", "mssql", "clickhouse"]}
|
||||
|
||||
[tool.poetry.group.providers]
|
||||
optional = true
|
||||
@@ -255,9 +271,14 @@ dbt-duckdb = ">=1.2.0"
|
||||
pymongo = ">=4.3.3"
|
||||
pandas = ">2"
|
||||
alive-progress = ">=3.0.1"
|
||||
pyarrow = ">=17.0.0"
|
||||
pyarrow = [
|
||||
{version = ">=17.0.0", markers = "python_version < '3.13'"},
|
||||
{version = ">=18.0.0", markers = "python_version >= '3.13'"}
|
||||
]
|
||||
psycopg2-binary = ">=2.9"
|
||||
lancedb = { version = ">=0.8.2", markers = "python_version >= '3.9'", allow-prereleases = true }
|
||||
lancedb = [
|
||||
{ version = ">=0.8.2", markers = "python_version < '3.13'", allow-prereleases = true }
|
||||
]
|
||||
openai = ">=1.45"
|
||||
connectorx = { version = ">=0.3.2" }
|
||||
modal = ">=0.64.170"
|
||||
@@ -276,3 +297,9 @@ multi_line_output = 3
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.8"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
# NOTE: needed for dlt-pendulum
|
||||
[[tool.poetry.source]]
|
||||
name = "dlt-pypi"
|
||||
url = "https://pypi.dlthub.com"
|
||||
priority = "supplemental"
|
||||
|
||||
@@ -1447,11 +1447,11 @@ def test_parallelized_resource_decorator() -> None:
|
||||
def some_tx_func(item):
|
||||
return list(range(item))
|
||||
|
||||
transformer = dlt.transformer(some_tx_func, parallelized=True, data_from=resource)
|
||||
transformer = dlt.transformer(some_tx_func, data_from=resource)
|
||||
pipe_gen = transformer._pipe.gen
|
||||
inner = pipe_gen(3) # type: ignore
|
||||
# this is a regular function returning list
|
||||
assert inner() == [0, 1, 2] # type: ignore[operator]
|
||||
assert inner == [0, 1, 2]
|
||||
assert list(transformer) == [0, 0, 1, 0, 1, 2]
|
||||
|
||||
# Invalid parallel resources
|
||||
|
||||
@@ -4,6 +4,7 @@ import pyarrow.parquet as pq
|
||||
import pytest
|
||||
import datetime # noqa: 251
|
||||
import time
|
||||
import math
|
||||
|
||||
from dlt.common import pendulum, Decimal, json
|
||||
from dlt.common.configuration import inject_section
|
||||
@@ -164,10 +165,14 @@ def test_parquet_writer_size_file_rotation() -> None:
|
||||
for i in range(0, 100):
|
||||
writer.write_data_item([{"col1": i}], columns)
|
||||
|
||||
assert len(writer.closed_files) == 25
|
||||
# different arrow version create different file sizes
|
||||
no_files = len(writer.closed_files)
|
||||
i_per_file = int(math.ceil(100 / no_files))
|
||||
assert no_files >= 17 and no_files <= 25
|
||||
|
||||
with open(writer.closed_files[4].file_path, "rb") as f:
|
||||
table = pq.read_table(f)
|
||||
assert table.column("col1").to_pylist() == list(range(16, 20))
|
||||
assert table.column("col1").to_pylist() == list(range(4 * i_per_file, 5 * i_per_file))
|
||||
|
||||
|
||||
def test_parquet_writer_config() -> None:
|
||||
|
||||
@@ -348,7 +348,7 @@ def test_nested_model_config_propagation() -> None:
|
||||
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressExtraAllow" # type: ignore[index]
|
||||
# annotated is preserved
|
||||
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
|
||||
assert issubclass(type_origin, Annotated) # type: ignore[arg-type]
|
||||
assert type_origin is Annotated
|
||||
# UserAddress is converted to UserAddressAllow only once
|
||||
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
|
||||
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
|
||||
@@ -405,7 +405,7 @@ def test_nested_model_config_propagation_optional_with_pipe():
|
||||
assert model_freeze.__fields__["address"].annotation.__name__ == "UserAddressPipeExtraAllow" # type: ignore[index]
|
||||
# annotated is preserved
|
||||
type_origin = get_origin(model_freeze.__fields__["address"].rebuild_annotation()) # type: ignore[index]
|
||||
assert issubclass(type_origin, Annotated) # type: ignore[arg-type]
|
||||
assert type_origin is Annotated
|
||||
# UserAddress is converted to UserAddressAllow only once
|
||||
type_annotation = model_freeze.__fields__["address"].annotation # type: ignore[index]
|
||||
assert type_annotation is get_args(model_freeze.__fields__["unity"].annotation)[0] # type: ignore[index]
|
||||
|
||||
@@ -179,7 +179,7 @@ def test_loading_errors(client: InsertValuesJobClient, file_storage: FileStorage
|
||||
"failed",
|
||||
file_format="insert_values",
|
||||
)
|
||||
assert type(job._exception) == DatabaseTerminalException # type: ignore
|
||||
assert type(job._exception) is DatabaseTerminalException # type: ignore
|
||||
# numeric overflow on NUMERIC
|
||||
insert_sql = (
|
||||
"INSERT INTO {}(_dlt_id, _dlt_root_id, sender_id, timestamp,"
|
||||
@@ -214,10 +214,10 @@ def test_loading_errors(client: InsertValuesJobClient, file_storage: FileStorage
|
||||
"failed",
|
||||
file_format="insert_values",
|
||||
)
|
||||
assert type(job._exception) == DatabaseTerminalException # type: ignore
|
||||
assert type(job._exception) is DatabaseTerminalException # type: ignore
|
||||
|
||||
assert (
|
||||
type(job._exception.dbapi_exception) == psycopg2.errors.InternalError_ # type: ignore
|
||||
type(job._exception.dbapi_exception) is psycopg2.errors.InternalError_ # type: ignore
|
||||
if dtype == "redshift"
|
||||
else TNumericValueOutOfRange
|
||||
)
|
||||
|
||||
@@ -488,7 +488,7 @@ def test_extract_source_twice() -> None:
|
||||
dlt.pipeline().extract(s)
|
||||
with pytest.raises(PipelineStepFailed) as py_ex:
|
||||
dlt.pipeline().extract(s)
|
||||
assert type(py_ex.value.exception) == SourceExhausted
|
||||
assert type(py_ex.value.exception) is SourceExhausted
|
||||
assert py_ex.value.exception.source_name == "source"
|
||||
|
||||
|
||||
|
||||
@@ -450,8 +450,8 @@ def test_skips_complex_fields_when_skip_nested_types_is_true_and_field_is_not_a_
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("pandas") is not None,
|
||||
reason="Test skipped because pandas IS installed",
|
||||
importlib.util.find_spec("pandas") is not None or importlib.util.find_spec("numpy") is not None,
|
||||
reason="Test skipped because pandas or numpy ARE installed",
|
||||
)
|
||||
def test_arrow_no_pandas() -> None:
|
||||
import pyarrow as pa
|
||||
@@ -461,20 +461,32 @@ def test_arrow_no_pandas() -> None:
|
||||
"Strings": ["apple", "banana", "cherry", "date", "elderberry"],
|
||||
}
|
||||
|
||||
df = pa.table(data)
|
||||
table = pa.table(data)
|
||||
|
||||
@dlt.resource
|
||||
def pandas_incremental(numbers=dlt.sources.incremental("Numbers")):
|
||||
yield df
|
||||
yield table
|
||||
|
||||
info = dlt.run(
|
||||
pandas_incremental(), write_disposition="append", table_name="data", destination="duckdb"
|
||||
pandas_incremental(), write_disposition="merge", table_name="data", destination="duckdb"
|
||||
)
|
||||
|
||||
# change table
|
||||
data = {
|
||||
"Numbers": [5, 6],
|
||||
"Strings": ["elderberry", "burak"],
|
||||
}
|
||||
|
||||
table = pa.table(data)
|
||||
|
||||
info = dlt.run(
|
||||
pandas_incremental(), write_disposition="merge", table_name="data", destination="duckdb"
|
||||
)
|
||||
|
||||
with info.pipeline.sql_client() as client: # type: ignore
|
||||
with client.execute_query("SELECT * FROM data") as c:
|
||||
with pytest.raises(ImportError):
|
||||
df = c.df()
|
||||
c.df()
|
||||
|
||||
|
||||
def test_empty_parquet(test_storage: FileStorage) -> None:
|
||||
|
||||
@@ -2,7 +2,7 @@ import re
|
||||
from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
from graphlib import CycleError # type: ignore
|
||||
from graphlib import CycleError
|
||||
|
||||
from dlt.sources.rest_api import (
|
||||
rest_api_resources,
|
||||
|
||||
Reference in New Issue
Block a user