migrate to uv (#2766)

* move pyproject.toml and makefile from old branch and add inbetween changes

* update workflow files to use uv

* run new version of formatter

* fix building of images with uv

* possibly fix docs linting

* downgrade lancedb dependency to fix tests

* fix gcs compat mode for s3 for newest boto

* fix docstrings in examples

* add some uv constraints

* update readme.md and contributing.md and some other places

* allow duckdb 0.8 in range

* add link-mode copy to uv venv on windows

* remove poetry lockfile and unneeded lockfile checker

* fix chess api related failures

* sleep after dremio start..

* set correct package in pyproject

* Revert "add some uv constraints"

This reverts commit d611e9ecce.

# Conflicts:
#	pyproject.toml
#	uv.lock

* add missing databricks sql connector version bounds
This commit is contained in:
David Scharf
2025-06-19 10:11:24 +02:00
committed by GitHub
parent b1cff8cc66
commit 3ebbfa1f9e
45 changed files with 10257 additions and 12619 deletions

View File

@@ -14,7 +14,7 @@ jobs:
matrix:
os:
- ubuntu-latest
python-version: ["3.9.x", "3.10.x", "3.11.x", "3.12.x"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
defaults:
run:
@@ -27,28 +27,19 @@ jobs:
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
python-version: ${{ matrix.python-version }}
activate-environment: true
enable-cache: true
- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk,dbt,marimo,streamlit
run: uv sync --all-extras --group airflow --group providers --group pipeline --group sentry-sdk --group dbt --group marimo --group streamlit
- name: Run make lint
run: |
@@ -57,7 +48,7 @@ jobs:
- name: Check that cli docs are up to date
run: make check-cli-docs
if: ${{ matrix.python-version == '3.11.x' }}
if: ${{ matrix.python-version == '3.11' }}
matrix_job_required_check:
name: lint | code & tests

View File

@@ -23,33 +23,33 @@ jobs:
# macos tests
- os: macos-latest
python-version: "3.11.x"
python-version: "3.11"
shell: bash
# linux tests
- os: ubuntu-latest
python-version: "3.9.x"
python-version: "3.9"
shell: bash
- os: ubuntu-latest
python-version: "3.10.x"
python-version: "3.10"
shell: bash
- os: ubuntu-latest
python-version: "3.11.x"
python-version: "3.11"
shell: bash
- os: ubuntu-latest
python-version: "3.12.x"
python-version: "3.12"
shell: bash
- os: ubuntu-latest
python-version: "3.13.x"
python-version: "3.13"
shell: bash
# windows tests
- os: windows-latest
python-version: "3.11.x"
python-version: "3.11"
shell: cmd
pytest_args: '-m "not forked"'
- os: windows-latest
python-version: "3.13.x"
python-version: "3.13"
shell: cmd
pytest_args: '-m "not forked"'
@@ -61,9 +61,8 @@ jobs:
steps:
- name: Check out
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@@ -77,75 +76,68 @@ jobs:
copy tzdata %USERPROFILE%\Downloads\tzdata
curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml --output %USERPROFILE%\Downloads\tzdata\windowsZones.xml
if: runner.os == 'Windows'
- name: Install Poetry
# https://github.com/snok/install-poetry#running-on-windows
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
python-version: ${{ matrix.python-version }}
activate-environment: true
- name: Install dependencies
run: poetry install --no-interaction --with sentry-sdk
run: uv sync --group sentry-sdk
- name: Run common tests with minimum dependencies
run: |
poetry run pytest tests/common tests/normalize tests/reflection tests/plugins tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py ${{ matrix.pytest_args }}
pytest tests/common tests/normalize tests/reflection tests/plugins tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py ${{ matrix.pytest_args }}
- name: Install duckdb dependencies
run: poetry install --no-interaction -E duckdb --with sentry-sdk
run: uv sync --extra duckdb --group sentry-sdk
- name: Run pipeline smoke tests with minimum deps
run: |
poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py ${{ matrix.pytest_args }}
pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py ${{ matrix.pytest_args }}
- name: Install pyarrow
run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk
run: uv sync --extra duckdb --extra cli --extra parquet --group sentry-sdk
- name: Run pipeline tests with pyarrow but no pandas installed
run: |
poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow ${{ matrix.pytest_args }}
pytest tests/pipeline/test_pipeline_extra.py -k arrow ${{ matrix.pytest_args }}
- name: Install pipeline and sources dependencies
run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources
run: uv sync --extra duckdb --extra cli --extra parquet --extra deltalake --extra sql_database --group sentry-sdk --group pipeline --group sources
- name: Run extract and pipeline tests
run: |
poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources tests/transformations ${{ matrix.pytest_args }}
pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources tests/transformations ${{ matrix.pytest_args }}
# here we upgrade sql alchemy to 2 an run the sql_database tests again
- name: Upgrade sql alchemy
run: poetry run pip install sqlalchemy==2.0.32
run: uv run pip install sqlalchemy==2.0.32
- name: Run extract and pipeline tests
run: |
poetry run pytest tests/sources/sql_database
pytest tests/sources/sql_database
# test marimo app, does not work with python 3.13
- name: Install dlt with duckdb and studio
run: poetry install --no-interaction -E duckdb --with sentry-sdk,pipeline,sources,ibis,marimo
if: matrix.python-version != '3.13.x'
run: uv sync --extra duckdb --group sentry-sdk --group pipeline --group sources --group ibis --group marimo
if: matrix.python-version != '3.13'
- name: Install playwright
run: poetry run playwright install
if: matrix.python-version != '3.13.x'
run: playwright install
if: matrix.python-version != '3.13'
# Run marimo studio unit tests
- name: Run marimo studio unit tests
run: |
poetry run pytest tests/helpers/studio
if: matrix.python-version != '3.13.x'
pytest tests/helpers/studio
if: matrix.python-version != '3.13'
# Run marimo e2e tests (does not pass with python 3.9, does not pass on windows (playwright does not work somehow), does not pass on python 3.13 (ibis not available))
- name: Run marimo e2e
run: |
poetry run marimo run --headless dlt/helpers/studio/app.py -- -- --pipelines_dir _storage/.dlt/pipelines/ --with_test_identifiers true & poetry run pytest --browser chromium tests/e2e
if: matrix.python-version != '3.13.x' && matrix.python-version != '3.9.x' && matrix.os != 'windows-latest'
marimo run --headless dlt/helpers/studio/app.py -- -- --pipelines_dir _storage/.dlt/pipelines/ --with_test_identifiers true & pytest --browser chromium tests/e2e
if: matrix.python-version != '3.13' && matrix.python-version != '3.9' && matrix.os != 'windows-latest'

View File

@@ -26,26 +26,25 @@ jobs:
- name: filesystem, weaviate, qdrant
destinations: "[\"filesystem\", \"weaviate\", \"qdrant\"]"
filesystem_drivers: "[\"memory\", \"file\", \"sftp\"]"
extras: "parquet cli filesystem qdrant weaviate deltalake pyiceberg sftp"
extras: "--extra parquet --extra cli --extra filesystem --extra qdrant --extra weaviate --extra deltalake --extra pyiceberg --extra sftp"
needs_weaviate: true
needs_qdrant: true
needs_ftp: true
post_install_commands: "poetry run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
- name: postgres, duckdb and dummy with cli commands
destinations: "[\"postgres\", \"duckdb\", \"dummy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "postgres postgis parquet duckdb cli filesystem"
extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb --extra cli --extra filesystem"
needs_postgres: true
with: ",adbc"
additional_tests: "poetry run pytest tests/cli"
additional_tests: "pytest tests/cli"
# Clickhouse OSS (TODO: test with minio s3)
- name: clickhouse
destinations: "[\"clickhouse\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "clickhouse parquet"
extras: "--extra clickhouse --extra parquet"
needs_clickhouse: true
# NOTE: we only run non-staging tests, as staging tests require credentials for s3 and azure
excluded_destination_configurations: "[\"clickhouse-parquet-staging-s3-authorization\", \"clickhouse-parquet-staging-az-authorization\", \"clickhouse-jsonl-staging-az-authorization\", \"clickhouse-jsonl-staging-s3-authorization\"]"
@@ -54,7 +53,7 @@ jobs:
- name: dremio
destinations: "[\"dremio\"]"
filesystem_drivers: "[\"memory\"]"
extras: "s3 gs az parquet"
extras: "--extra s3 --extra gs --extra az --extra parquet"
needs_dremio: true
@@ -62,17 +61,17 @@ jobs:
- name: sqlalchemy
destinations: "[\"sqlalchemy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "sqlalchemy filesystem parquet"
extras: "--extra sqlalchemy --extra filesystem --extra parquet"
needs_mysql: true
post_install_commands: "poetry run pip install pymysql && poetry run pip install sqlalchemy==1.4"
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4"
# SQLAlchemy 2.0 (same as above but with sqlalchemy 2.0)
- name: sqlalchemy
destinations: "[\"sqlalchemy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "sqlalchemy filesystem parquet"
extras: "--extra sqlalchemy --extra filesystem --extra parquet"
needs_mysql: true
post_install_commands: "poetry run pip install pymysql && poetry run pip install sqlalchemy==2.0"
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==2.0"
env:
ACTIVE_DESTINATIONS: ${{ matrix.destinations }}
@@ -148,7 +147,7 @@ jobs:
if: ${{ matrix.needs_ftp }}
- name: Start dremio
run: docker compose -f "tests/load/dremio/docker-compose.yml" up -d
run: docker compose -f "tests/load/dremio/docker-compose.yml" up -d && sleep 30
if: ${{ matrix.needs_dremio }}
- run: |
@@ -164,20 +163,19 @@ jobs:
#
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Install dependencies
run: poetry install --no-interaction --with sentry-sdk,pipeline,ibis,providers${{ matrix.with }} --extras "${{ matrix.extras }}"
run: uv sync --group sentry-sdk --group pipeline --group ibis --group providers ${{ matrix.extras }}
- name: Copy secrets for local tests
run: |
@@ -191,7 +189,7 @@ jobs:
- name: Run tests Linux
run: |
eval "$(ssh-agent -s)"
poetry run pytest tests/load --ignore tests/load/sources --ignore tests/load/filesystem_sftp
pytest tests/load --ignore tests/load/sources --ignore tests/load/filesystem_sftp
- name: Run additional tests
run: ${{ matrix.additional_tests }}

View File

@@ -36,75 +36,75 @@ jobs:
destinations: "[\"athena\"]"
filesystem_drivers: "[\"memory\"]"
excluded_destination_configurations: "[\"athena-parquet-iceberg-no-staging-iceberg\", \"athena-parquet-iceberg-staging-iceberg\"]"
extras: "athena"
extras: "--extra athena"
# Athena iceberg (NOTE: same as athene with different configs disabled)
- name: athena iceberg
destinations: "[\"athena\"]"
filesystem_drivers: "[\"memory\"]"
excluded_destination_configurations: "[\"athena-no-staging\", \"athena-parquet-no-staging\"]"
extras: "athena"
extras: "--extra athena"
# BigQuery
- name: bigquery
destinations: "[\"bigquery\"]"
filesystem_drivers: "[\"memory\"]"
extras: "bigquery parquet"
extras: "--extra bigquery --extra parquet"
# Clickhouse
- name: clickhouse
destinations: "[\"clickhouse\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "clickhouse parquet"
extras: "--extra clickhouse --extra parquet"
# Databricks
- name: databricks
destinations: "[\"databricks\"]"
filesystem_drivers: "[\"memory\"]"
extras: "databricks s3 gs az parquet"
extras: "--extra databricks --extra s3 --extra gs --extra az --extra parquet"
# Filesystem
- name: filesystem_s3_local
destinations: "[\"filesystem\"]"
# note that all buckets are enabled for testing
filesystem_drivers: "[\"memory\", \"file\", \"r2\", \"s3\"]" # excludes sftp which is run in local tests
extras: "s3 parquet duckdb filesystem deltalake pyiceberg"
post_install_commands: "poetry run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
extras: "--extra s3 --extra parquet --extra duckdb --extra filesystem --extra deltalake --extra pyiceberg"
post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
- name: filesystem_az
destinations: "[\"filesystem\"]"
# note that all buckets are enabled for testing
filesystem_drivers: "[\"memory\", \"az\", \"abfss\"]" # excludes sftp which is run in local tests
extras: "az parquet duckdb filesystem deltalake pyiceberg"
post_install_commands: "poetry run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
extras: "--extra az --extra parquet --extra duckdb --extra filesystem --extra deltalake --extra pyiceberg"
post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
- name: filesystem_gs_gdrive
destinations: "[\"filesystem\"]"
# note that all buckets are enabled for testing
filesystem_drivers: "[\"memory\", \"gs\", \"gdrive\"]" # excludes sftp which is run in local tests
extras: "gs parquet duckdb filesystem deltalake pyiceberg"
post_install_commands: "poetry run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
extras: "--extra gs --extra parquet --extra duckdb --extra filesystem --extra deltalake --extra pyiceberg"
post_install_commands: "uv run pip install sqlalchemy==2.0.18" # minimum version required by `pyiceberg`
# LanceDB
- name: lancedb
destinations: "[\"lancedb\"]"
filesystem_drivers: "[\"memory\"]"
extras: "lancedb parquet"
post_install_commands: "poetry run pip install openai"
extras: "--extra lancedb --extra parquet"
post_install_commands: "uv run pip install openai"
# Motherduck
- name: motherduck
destinations: "[\"motherduck\"]"
filesystem_drivers: "[\"memory\"]"
extras: "motherduck s3 gs az parquet"
extras: "--extra motherduck --extra s3 --extra gs --extra az --extra parquet"
# MSSQL
- name: mssql
destinations: "[\"mssql\"]"
filesystem_drivers: "[\"memory\"]"
extras: "mssql s3 gs az parquet"
extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet"
pre_install_commands: "sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18"
always_run_all_tests: true
@@ -112,34 +112,32 @@ jobs:
- name: synapse
destinations: "[\"synapse\"]"
filesystem_drivers: "[\"memory\"]"
extras: "synapse parquet"
extras: "--extra synapse --extra parquet"
pre_install_commands: "sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18"
# Postgres and Redshift (used to be test_destinations.yml)
- name: redshift
destinations: "[\"redshift\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "postgres redshift postgis s3 gs az parquet duckdb"
with: ",adbc"
extras: "--group adbc --extra postgres --extra redshift --extra postgis --extra s3 --extra gs --extra az --extra parquet --extra duckdb"
- name: postgres
destinations: "[\"postgres\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "postgres postgis parquet duckdb"
with: ",adbc"
extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb"
always_run_all_tests: true
# Qdrant (disabled, because we do not have a test account atm, qdrant is tested with local version)
# - name: qdrant
# destinations: "[\"qdrant\"]"
# filesystem_drivers: "[\"memory\"]"
# extras: "qdrant parquet"
# extras: "--extra qdrant --extra parquet"
# Snowflake
- name: snowflake
destinations: "[\"snowflake\"]"
filesystem_drivers: "[\"memory\"]"
extras: "snowflake s3 gs az parquet"
extras: "--extra snowflake --extra s3 --extra gs --extra az --extra parquet"
env:
ACTIVE_DESTINATIONS: ${{ matrix.destinations }}
@@ -159,24 +157,23 @@ jobs:
ref: ${{ github.event.pull_request.head.sha || github.ref }}
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Run pre install commands
run: ${{ matrix.pre_install_commands }}
if: ${{ matrix.pre_install_commands }}
- name: Install dependencies
run: poetry install --no-interaction --with sentry-sdk,pipeline,ibis,providers${{ matrix.with }} --extras "${{ matrix.extras }}"
run: uv sync --group sentry-sdk --group pipeline --group ibis --group providers ${{ matrix.extras }}
- name: Run post install commands
run: ${{ matrix.post_install_commands }}
@@ -187,12 +184,12 @@ jobs:
# NOTE: essential tests are always run
- run: |
poetry run pytest tests/load --ignore tests/load/sources -m "essential"
pytest tests/load --ignore tests/load/sources -m "essential"
name: Run essential tests Linux
# NOTE: non-essential tests are run if the full test suite is requested or if the matrix item has always_run_all_tests set to true
# we want to run this step even if the essential tests fail
- run: |
poetry run pytest tests/load --ignore tests/load/sources -m "not essential"
pytest tests/load --ignore tests/load/sources -m "not essential"
name: Run non-essential tests Linux
if: ${{ always() && (inputs.run_full_test_suite || matrix.always_run_all_tests) }}

View File

@@ -54,40 +54,31 @@ jobs:
- name: Start weaviate
run: docker compose -f "tests/load/weaviate/docker-compose.yml" up -d
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10.x"
- name: Setup node 20
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Setup Python
uses: actions/setup-python@v5
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
python-version: "3.10"
# - name: Load cached venv
# id: cached-poetry-dependencies
# uses: actions/cache@v3
# with:
# path: .venv
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Install dlt-plus nightly devel build without cache
run: poetry run pip install --upgrade --force-reinstall --no-cache-dir --pre dlt-plus
run: uv run pip install --upgrade --force-reinstall --no-cache-dir --pre dlt-plus
- name: run docs preprocessor
run: make preprocess-docs
- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery -E postgres -E lancedb --with docs,sentry-sdk,ibis --without airflow -E s3
run: uv sync --extra duckdb --extra weaviate --extra parquet --extra qdrant --extra bigquery --extra postgres --extra lancedb --group docs --group sentry-sdk --group ibis --extra s3
- name: create secrets.toml for examples
run: pwd && echo "$DLT_SECRETS_TOML" > docs/examples/.dlt/secrets.toml
@@ -95,10 +86,6 @@ jobs:
- name: create secrets.toml for snippets
run: pwd && echo "$DLT_SECRETS_TOML" > docs/website/docs/.dlt/secrets.toml
# NOTE: there seems to be a conflict between pydoclint and pydoc-markdown dependencies
- name: Force uninstall pydoclint
run: poetry run pip uninstall pydoclint -y
- name: Run linter and tests on snippets
run: make lint-and-test-snippets

View File

@@ -18,18 +18,18 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.10.x", "3.11.x", "3.12.x"]
python-version: ["3.10", "3.11", "3.12"]
plus_dep: ["dlt-plus", "https://dlt-packages.fra1.digitaloceanspaces.com/dlt-plus/dlt_plus-0.0.0+nightly-py3-none-any.whl"]
# Test all python versions on ubuntu only
exclude:
- os: "macos-latest"
python-version: "3.10.x"
python-version: "3.10"
- os: "macos-latest"
python-version: "3.12.x"
python-version: "3.12"
- os: "windows-latest"
python-version: "3.10.x"
python-version: "3.10"
- os: "windows-latest"
python-version: "3.12.x"
python-version: "3.12"
defaults:
run:
@@ -41,18 +41,16 @@ jobs:
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
# https://github.com/snok/install-poetry#running-on-windows
uses: snok/install-poetry@v1.3.2
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
activate-environment: true
enable-cache: true
# NOTE: needed for mssql source tests in plus
- name: Install ODBC driver for SQL Server
@@ -60,28 +58,18 @@ jobs:
sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18
if: matrix.os == 'ubuntu-latest'
# NOTE: do not cache. we want to have a clean state each run and we upgrade depdendencies later
# - name: Load cached venv
# id: cached-poetry-dependencies
# uses: actions/cache@v3
# with:
# # path: ${{ steps.pip-cache.outputs.dir }}
# path: .venv
# key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install all dependencies
run: make dev
- name: Install dlt-plus nightly devel build without cache
run: poetry run pip install --upgrade --force-reinstall --no-cache-dir ${{ matrix.plus_dep }}
run: uv run pip install --upgrade --force-reinstall --no-cache-dir ${{ matrix.plus_dep }}
- name: Run tests
run: poetry run pytest tests/plus
run: pytest tests/plus
if: matrix.os == 'ubuntu-latest'
- name: Run tests on mac on win without mssql driver
run: poetry run pytest tests/plus -m "not mssql"
run: pytest tests/plus -m "not mssql"
if: matrix.os == 'macos-latest' || matrix.os == 'windows-latest'
matrix_job_required_check:

View File

@@ -49,40 +49,32 @@ jobs:
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-sources
python-version: "3.10"
activate-environment: true
enable-cache: true
# TODO: which deps should we enable?
- name: Install dependencies
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E sql_database --with sentry-sdk,pipeline,sources,adbc
run: uv sync --extra postgres --extra postgis --extra duckdb --extra parquet --extra filesystem --extra cli --extra sql_database --group sentry-sdk --group pipeline --group sources --group adbc
- name: Copy secrets for local tests
run: |
cp tests/.dlt/dev.secrets.toml tests/.dlt/secrets.toml
# run sources tests in load against configured destinations
- run: poetry run pytest tests/load/sources
- run: pytest tests/load/sources
name: Run tests Linux
# here we upgrade sql alchemy to 2 an run the sql_database tests again
- name: Upgrade sql alchemy
run: poetry run pip install sqlalchemy==2.0.32
run: uv run pip install sqlalchemy==2.0.32
- run: poetry run pytest tests/load/sources/sql_database
- run: pytest tests/load/sources/sql_database
name: Run tests Linux

View File

@@ -14,28 +14,20 @@ jobs:
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-airflow-runner
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Install dependencies
run: poetry install --no-interaction --with airflow --with pipeline -E duckdb -E parquet --with sentry-sdk
run: uv sync --group airflow --group pipeline --extra duckdb --extra parquet --group sentry-sdk
- run: |
poetry run pytest tests/helpers/airflow_tests
pytest tests/helpers/airflow_tests
name: Run tests

View File

@@ -14,17 +14,16 @@ jobs:
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Build images
run: make test-build-images

View File

@@ -28,31 +28,22 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha || github.ref }}
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry without dbt
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
# path: ${{ steps.pip-cache.outputs.dir }}
path: .venv
key: venv-${{ matrix.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dbt-cloud
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Install dependencies
# install dlt with postgres support
run: poetry install --no-interaction
run: uv sync
- run: |
poetry run pytest tests/helpers/dbt_cloud_tests -k '(not venv)'
pytest tests/helpers/dbt_cloud_tests -k '(not venv)'
name: Run dbt cloud - Linux/MAC

View File

@@ -26,41 +26,32 @@ jobs:
ref: ${{ github.event.pull_request.head.sha || github.ref }}
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10.x"
python-version: "3.10"
- name: Install Poetry without dbt
uses: snok/install-poetry@v1.3.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
version: 1.8.5
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
# path: ${{ steps.pip-cache.outputs.dir }}
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-dbt-runner
python-version: "3.10"
activate-environment: true
enable-cache: true
- name: Install dependencies
# install dlt with postgres support
run: poetry install --no-interaction -E postgres -E postgis --with sentry-sdk,dbt
run: uv sync --extra postgres --extra postgis --group sentry-sdk --group dbt
- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
- run: |
poetry run pytest tests/helpers/dbt_tests -k '(not venv)'
pytest tests/helpers/dbt_tests -k '(not venv)'
name: Run dbt tests
- name: Remove dbt-core
# install dlt with postgres support
run: poetry run pip uninstall dbt-core -y
run: uv run pip uninstall dbt-core -y
- run: |
poetry run pytest tests/helpers/dbt_tests --ignore=tests/helpers/dbt_tests/local -k '(not local)'
pytest tests/helpers/dbt_tests --ignore=tests/helpers/dbt_tests/local -k '(not local)'
name: Run dbt runner with venv - Linux/MAC

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
# temp requirements generated by poetry
# temp requirements generated by uv
_gen_requirements*.txt
_storage
**/_storage

View File

@@ -36,9 +36,9 @@ Thank you for considering contributing to **dlt**! We appreciate your help in ma
To get started, follow these steps:
1. Fork the `dlt` repository and clone it to your local machine.
2. Install `poetry` with `make install-poetry` (or follow the [official instructions](https://python-poetry.org/docs/#installation)).
2. Install `uv` with `make install-uv` (or follow the [official instructions](https://docs.astral.sh/uv/getting-started/installation/).
3. Run `make dev` to install all dependencies including dev ones.
4. Start working in the `poetry` shell by executing `poetry shell`.
4. Activate your venv with `make shell` and starting working, or prepend all commands with `uv run` to run within the uv environment. `uv run` is encouraged as it will automatically keep you project dependencies up to date.
## Submitting Changes
@@ -149,7 +149,7 @@ We'll provide you with access to the resources above if you wish to test locally
## Local Development
Use Python 3.9 for development, as it's the lowest supported version for `dlt`. You'll need `distutils` and `venv`. You may also use `pyenv`, as suggested by [poetry](https://python-poetry.org/docs/managing-environments/).
Use Python 3.9 for development, as it's the lowest supported version for `dlt`. You can select (and if needed download) the python version you need with `uv venv --python 3.11.6`, [uv python version docs](https://docs.astral.sh/uv/concepts/python-versions/#managed-and-system-python-installations).
## Publishing (Maintainers Only)
@@ -157,28 +157,28 @@ This section is intended for project maintainers who have the necessary permissi
Please read how we [version the library](README.md#adding-as-dependency) first.
The source of truth for the current version is `pyproject.toml`, and we use `poetry` to manage it.
The source of truth for the current version is `pyproject.toml`, and we use `uv` to manage it.
### Regular release
Before publishing a new release, make sure to bump the project's version accordingly:
1. Check out the **devel** branch.
2. Use `poetry version patch` to increase the **patch** version
2. Use `uv version --bump patch` to increase the **patch** version. You can also bump to `minor` or `major`.
3. Run `make build-library` to apply the changes to the project.
4. Create a new branch, and submit the PR to **devel**. Go through the standard process to merge it.
5. Create a merge PR from `devel` to `master` and merge it with a merge commit.
### Hotfix release
1. Check out the **master** branch
2. Use `poetry version patch` to increase the **patch** version
2. Use `uv version --bump patch` to increase the **patch** version
3. Run `make build-library` to apply the changes to the project.
4. Create a new branch, submit the PR to **master** and merge it.
### Pre-release
Occasionally we may release an alpha version directly from the **branch**.
1. Check out the **devel** branch
2. Use `poetry version prerelease` to increase the **alpha** version
2. You need to manually update the alpha version in the `pyproject.toml` file and run `uv sync` to update the uv lockfile.
3. Run `make build-library` to apply the changes to the project.
4. Create a new branch, and submit the PR to **devel** and merge it.
@@ -187,15 +187,16 @@ Occasionally we may release an alpha version directly from the **branch**.
Once the version has been bumped, follow these steps to publish the new release to PyPI:
1. Ensure that you are on the **master** branch and have the latest code that has passed all tests on CI.
2. Verify the current version with `poetry version`.
3. Obtain a PyPI access token and configure it with `poetry config pypi-token.pypi your-api-token`.
4. Run `make publish-library` to publish the new version.
2. Verify the current version with `uv version`.
3. Obtain a PyPI access token
4. Build the library with `make build-library`
4. Run `uv publish --token "$PYPI_API_TOKEN"` to publish the new version.
5. Create a release on GitHub, using the version and git tag as the release name.
## Resources
- [dlt Docs](https://dlthub.com/docs)
- [Poetry Documentation](https://python-poetry.org/docs/)
- [uv Documentation](https://docs.astral.sh/uv/)
If you have any questions or need help, don't hesitate to reach out to us. We're here to help you succeed in contributing to `dlt`. Happy coding!
****

106
Makefile
View File

@@ -1,7 +1,7 @@
.PHONY: install-poetry build-library-prerelease has-poetry dev lint test test-common reset-test-storage recreate-compiled-deps build-library-prerelease publish-library
.PHONY: install-uv build-library-prerelease has-uv dev lint test test-common reset-test-storage recreate-compiled-deps build-library-prerelease publish-library
PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)")
.SILENT:has-poetry
.SILENT:has-uv
# read version from package
# AUTV=$(shell cd dlt && python3 -c "from __version__ import __version__;print(__version__)")
@@ -15,8 +15,8 @@ PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_i
help:
@echo "make"
@echo " install-poetry"
@echo " installs newest poetry version"
@echo " install-uv"
@echo " installs newest uv version"
@echo " dev"
@echo " prepares development env"
@echo " lint"
@@ -34,60 +34,66 @@ help:
@echo " publish-library"
@echo " builds library and then publishes it to pypi"
install-poetry:
install-uv:
ifneq ($(VIRTUAL_ENV),)
$(error you cannot be under virtual environment $(VIRTUAL_ENV))
endif
curl -sSL https://install.python-poetry.org | python3 -
endif u
curl -LsSf https://astral.sh/uv/install.sh | sh
has-poetry:
poetry --version
has-uv:
uv --version
dev: has-poetry
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk,ibis,adbc
dev: has-uv
uv sync --all-extras --group docs --group dev --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group adbc --group marimo
shell:
source .venv/bin/activate
dev-airflow: has-poetry
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk,ibis,airflow
dev-airflow: has-uv
uv sync --all-extras --group docs --group providers --group pipeline --group sources --group sentry-sdk --group ibis --group airflow
lint:
poetry run python ./tools/check-lockfile.py
poetry run mypy --config-file mypy.ini dlt tests
uv run mypy --config-file mypy.ini dlt tests
# NOTE: we need to make sure docstring_parser_fork is the only version of docstring_parser installed
uv pip uninstall docstring_parser
uv pip install docstring_parser_fork --reinstall
# NOTE: we exclude all D lint errors (docstrings)
poetry run flake8 --extend-ignore=D --max-line-length=200 dlt
poetry run flake8 --extend-ignore=D --max-line-length=200 tests --exclude tests/reflection/module_cases,tests/common/reflection/cases/modules/
poetry run black dlt docs tests --check --diff --color --extend-exclude=".*syntax_error.py"
# poetry run isort ./ --diff
uv run flake8 --extend-ignore=D --max-line-length=200 dlt
uv run flake8 --extend-ignore=D --max-line-length=200 tests --exclude tests/reflection/module_cases,tests/common/reflection/cases/modules/
uv run black dlt docs tests --check --diff --color --extend-exclude=".*syntax_error.py"
# uv run isort ./ --diff
$(MAKE) lint-security
$(MAKE) lint-docstrings
format:
poetry run black dlt docs tests --extend-exclude='.*syntax_error.py|_storage/.*'
uv run black dlt docs tests --extend-exclude='.*syntax_error.py|_storage/.*'
lint-snippets:
cd docs/tools && poetry run python check_embedded_snippets.py full
cd docs/tools && uv run python check_embedded_snippets.py full
lint-and-test-snippets: lint-snippets
poetry run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
poetry run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo
cd docs/website/docs && poetry run pytest --ignore=node_modules
uv pip install docstring_parser_fork --reinstall
uv run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
uv run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo
cd docs/website/docs && uv run pytest --ignore=node_modules
lint-and-test-examples:
cd docs/tools && poetry run python prepare_examples_tests.py
poetry run flake8 --max-line-length=200 docs/examples
poetry run mypy --config-file mypy.ini docs/examples
cd docs/examples && poetry run pytest
uv pip install docstring_parser_fork --reinstall
cd docs/tools && uv run python prepare_examples_tests.py
uv run flake8 --max-line-length=200 docs/examples
uv run mypy --config-file mypy.ini docs/examples
cd docs/examples && uv run pytest
test-examples:
cd docs/examples && poetry run pytest
cd docs/examples && uv run pytest
lint-security:
# go for ll by cleaning up eval and SQL warnings.
poetry run bandit -r dlt/ -n 3 -lll
uv run bandit -r dlt/ -n 3 -lll
# check docstrings for all important public classes and functions
lint-docstrings:
poetry run flake8 --count \
uv run flake8 --count \
dlt/common/pipeline.py \
dlt/extract/decorators.py \
dlt/destinations/decorators.py \
@@ -100,16 +106,16 @@ lint-docstrings:
tests/pipeline/utils.py
test:
poetry run pytest tests
uv run pytest tests
test-load-local:
ACTIVE_DESTINATIONS='["duckdb", "filesystem"]' ALL_FILESYSTEM_DRIVERS='["memory", "file"]' poetry run pytest tests/load
ACTIVE_DESTINATIONS='["duckdb", "filesystem"]' ALL_FILESYSTEM_DRIVERS='["memory", "file"]' uv run pytest tests/load
test-load-local-postgres:
DESTINATION__POSTGRES__CREDENTIALS=postgresql://loader:loader@localhost:5432/dlt_data ACTIVE_DESTINATIONS='["postgres"]' ALL_FILESYSTEM_DRIVERS='["memory"]' poetry run pytest tests/load
DESTINATION__POSTGRES__CREDENTIALS=postgresql://loader:loader@localhost:5432/dlt_data ACTIVE_DESTINATIONS='["postgres"]' ALL_FILESYSTEM_DRIVERS='["memory"]' uv run pytest tests/load
test-common:
poetry run pytest tests/common tests/normalize tests/extract tests/pipeline tests/reflection tests/sources tests/cli/common tests/load/test_dummy_client.py tests/libs tests/destinations tests/transformations
uv run pytest tests/common tests/normalize tests/extract tests/pipeline tests/reflection tests/sources tests/cli/common tests/load/test_dummy_client.py tests/libs tests/destinations tests/transformations
reset-test-storage:
-rm -r _storage
@@ -117,22 +123,18 @@ reset-test-storage:
python3 tests/tools/create_storages.py
build-library: dev
poetry version
poetry build
publish-library: build-library
poetry publish
uv version
uv build
test-build-images: build-library
# NOTE: poetry export does not work with our many different deps, we install a subset and freeze
# poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
poetry install --no-interaction -E gcp -E redshift -E duckdb
poetry run pip freeze > _gen_requirements.txt
# NOTE: uv export does not work with our many different deps, we install a subset and freeze
uv sync --extra gcp --extra redshift --extra duckdb
uv pip freeze > _gen_requirements.txt
# filter out libs that need native compilation
grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
# enable when we upgrade arrow to 20.x
# docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell poetry version -s)" .
docker build -f deploy/dlt/Dockerfile.airflow --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell uv version --short)" .
# enable when we upgrade arrow to 20.x
# docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA="$(shell git log -1 --pretty=%h)" --build-arg=IMAGE_VERSION="$(shell uv version)" .
preprocess-docs:
# run docs preprocessing to run a few checks and ensure examples can be parsed
@@ -147,16 +149,16 @@ start-test-containers:
docker compose -f "tests/load/clickhouse/docker-compose.yml" up -d
update-cli-docs:
poetry run dlt --debug render-docs docs/website/docs/reference/command-line-interface.md
uv run dlt --debug render-docs docs/website/docs/reference/command-line-interface.md
check-cli-docs:
poetry run dlt --debug render-docs docs/website/docs/reference/command-line-interface.md --compare
uv run dlt --debug render-docs docs/website/docs/reference/command-line-interface.md --compare
test-e2e-studio:
poetry run pytest --browser chromium tests/e2e
uv run pytest --browser chromium tests/e2e
test-e2e-studio-headed:
poetry run pytest --headed --browser chromium tests/e2e
uv run pytest --headed --browser chromium tests/e2e
start-dlt-studio-e2e:
poetry run marimo run --headless dlt/helpers/studio/app.py -- -- --pipelines_dir _storage/.dlt/pipelines --with_test_identifiers true
uv run marimo run --headless dlt/helpers/studio/app.py -- -- --pipelines_dir _storage/.dlt/pipelines --with_test_identifiers true

View File

@@ -80,7 +80,7 @@ class BaseDocProvider(ConfigProvider):
key: str,
value: Any,
pipeline_name: Optional[str],
*sections: str
*sections: str,
) -> None:
if pipeline_name:
sections = (pipeline_name,) + sections
@@ -109,7 +109,7 @@ class BaseDocProvider(ConfigProvider):
key: Optional[str],
value_or_fragment: str,
pipeline_name: str,
*sections: str
*sections: str,
) -> Any:
"""Tries to interpret `value_or_fragment` as a fragment of toml, yaml or json string and replace/merge into config doc.

View File

@@ -188,6 +188,9 @@ class Venv:
"--prerelease",
"if-necessary-or-explicit",
]
# on windows we need to copy the dependencies to the venv instead of linking
if os.name == "nt":
cmd.extend(["--link-mode", "copy"])
else:
cmd = [context.env_exe, "-Im", pip_tool, "install"]

View File

@@ -51,7 +51,7 @@ class FSClientBase(ABC):
errors: Any = None,
newline: Any = None,
compression: str = None,
**kwargs: Any
**kwargs: Any,
) -> str:
"""reads given file into string, tries gzip and pure text"""
if compression is None:

View File

@@ -287,6 +287,12 @@ class FilesystemClient(
self.bucket_path = (
config.make_local_path(config.bucket_url) if self.is_local_filesystem else fs_path
)
# NOTE: we need to make checksum validation optional for boto to work with s3 compat mode
# https://www.beginswithdata.com/2025/05/14/aws-s3-tools-with-gcs/
os.environ["AWS_REQUEST_CHECKSUM_CALCULATION"] = "when_required"
os.environ["AWS_RESPONSE_CHECKSUM_VALIDATION"] = "when_required"
# pick local filesystem pathlib or posix for buckets
self.pathlib = os.path if self.is_local_filesystem else posixpath

View File

@@ -67,9 +67,10 @@ class PostgresParquetCopyJob(RunnableLoadJob, HasFollowupJobs):
for table in pq_stream_with_new_columns(file_path, ()):
yield from table.to_batches()
with adbapi.connect(
self._config.credentials.to_native_representation()
) as conn, conn.cursor() as cur:
with (
adbapi.connect(self._config.credentials.to_native_representation()) as conn,
conn.cursor() as cur,
):
rows = cur.adbc_ingest(
self.load_table_name,
_iter_batches(self._file_path),

View File

@@ -117,7 +117,7 @@ class QdrantClientConfiguration(WithLocalFiles, DestinationClientDwhConfiguratio
location=self.qd_location,
path=self.qd_path,
api_key=self.credentials.api_key,
**options
**options,
)
client.set_model(model)
return client

View File

@@ -429,15 +429,15 @@ class Extract(WithStepInfo[ExtractMetrics, ExtractInfo]):
load_id = self.extract_storage.create_load_package(
source.schema, reuse_exiting_package=True
)
with Container().injectable_context(
SourceSchemaInjectableContext(source.schema)
), Container().injectable_context(
SourceInjectableContext(source)
), Container().injectable_context(
LoadPackageStateInjectableContext(
load_id=load_id, storage=self.extract_storage.new_packages
)
) as load_package:
with (
Container().injectable_context(SourceSchemaInjectableContext(source.schema)),
Container().injectable_context(SourceInjectableContext(source)),
Container().injectable_context(
LoadPackageStateInjectableContext(
load_id=load_id, storage=self.extract_storage.new_packages
)
) as load_package,
):
# inject the config section with the current source name
with inject_section(
ConfigSectionContext(

View File

@@ -55,9 +55,11 @@ def import_pipeline_script(
module_path: str, script_relative_path: str, ignore_missing_imports: bool = False
) -> ModuleType:
# patch entry points to pipeline, sources and resources to prevent pipeline from running
with patch.object(Pipeline, "__init__", patch__init__), patch.object(
DltSource, "__init__", patch__init__
), patch.object(ManagedPipeIterator, "__init__", patch__init__):
with (
patch.object(Pipeline, "__init__", patch__init__),
patch.object(DltSource, "__init__", patch__init__),
patch.object(ManagedPipeIterator, "__init__", patch__init__),
):
return import_script_module(
module_path, script_relative_path, ignore_missing_imports=ignore_missing_imports
)

View File

@@ -78,7 +78,7 @@ def _read_csv_duckdb(
items: Iterator[FileItemDict],
chunk_size: Optional[int] = 5000,
use_pyarrow: bool = False,
**duckdb_kwargs: Any
**duckdb_kwargs: Any,
) -> Iterator[TDataItems]:
"""A resource to extract data from the given CSV files.

View File

@@ -46,7 +46,7 @@ def _get_retry_response(retry_state: RetryCallState) -> Optional[Response]:
ex = retry_state.outcome.exception()
if ex:
if isinstance(ex, HTTPError):
return cast(Response, ex.response)
return ex.response
return None
result = retry_state.outcome.result()
return result if isinstance(result, Response) else None

View File

@@ -280,7 +280,7 @@ def sql_table(
name=str(table),
write_disposition=write_disposition,
merge_key=merge_key,
**hints
**hints,
)(
engine,
table_obj if table_obj is not None else table, # Pass table name if reflection deferred

View File

@@ -30,5 +30,5 @@ If you use any secrets for the code snippets, e.g. Zendesk requires credentials.
If your example requires any additional dependency, then you can add it
- To `pyproject.toml` in the `[tool.poetry.group.docs.dependencies]` section.
- Do not forget to update your `poetry.lock` file with `poetry lock --no-update` command and commit.
- To `pyproject.toml` in the `[dependency-groups]` section in the `docs` group.
- Do not forget to update your `uv.lock` file with `uv sync` command and commit.

View File

@@ -3,7 +3,7 @@ from typing import Any, Iterator
import dlt
from dlt.common.typing import StrAny, TDataItem, TDataItems
from dlt.common.time import timestamp_within
from dlt.extract.resource import DltResource
from dlt.extract import DltResource, DltSource
@dlt.source
@@ -14,7 +14,7 @@ def rasa(
initial_timestamp: float = None,
end_timestamp: float = None,
store_last_timestamp: bool = True,
) -> Any:
) -> DltSource:
"""Transforms the base resource provided in `data_from` into a rasa tracker store raw dataset where each event type get it's own table.
The resource is a stream resource and it generates tables dynamically from data. The source uses `rasa.schema.yaml` file to initialize the schema

View File

@@ -105,7 +105,7 @@ def tap(
os.path.abspath(config_file_path),
"--catalog",
os.path.abspath(catalog_file_path),
*state_params
*state_params,
)
yield from get_source_from_stream(pipe_iterator, state)

View File

@@ -20,13 +20,14 @@ We'll learn:
# NOTE: this line is only for dlt CI purposes, you may delete it if you are using this example
__source_name__ = "zendesk"
from typing import Optional, Dict, Any, Tuple
from typing import Optional, Dict, Any, Tuple, Iterable, List
import dlt
from dlt.common import pendulum
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers import requests
from dlt.extract import DltResource
@dlt.source(max_table_nesting=2)
@@ -34,19 +35,19 @@ def zendesk_support(
credentials: Dict[str, str] = dlt.secrets.value,
start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008
end_date: Optional[TAnyDateTime] = None,
):
) -> DltResource:
"""
Retrieves data from Zendesk Support for tickets events.
Args:
credentials: Zendesk credentials (default: dlt.secrets.value)
start_date: Start date for data extraction (default: 2000-01-01)
end_date: End date for data extraction (default: None).
credentials (Dict[str, str]): Zendesk credentials (default: dlt.secrets.value)
start_date (Optional[TAnyDateTime]): Start date for data extraction (default: 2000-01-01)
end_date (Optional[TAnyDateTime]): End date for data extraction (default: None).
If end time is not provided, the incremental loading will be
enabled, and after the initial run, only new data will be retrieved.
Returns:
DltResource.
DltResource: a resource with ticket events
"""
# Convert start_date and end_date to Pendulum datetime objects
start_date_obj = ensure_pendulum_datetime(start_date)
@@ -101,19 +102,19 @@ def get_pages(
auth: Tuple[str, str],
data_point_name: str,
params: Optional[Dict[str, Any]] = None,
):
) -> Iterable[List[Dict[str, Any]]]:
"""
Makes a request to a paginated endpoint and returns a generator of data items per page.
Args:
url: The base URL.
endpoint: The url to the endpoint, e.g. /api/v2/calls
auth: Credentials for authentication.
data_point_name: The key which data items are nested under in the response object (e.g. calls)
params: Optional dict of query params to include in the request.
url (str): The base URL.
endpoint (str): The url to the endpoint, e.g. /api/v2/calls
auth (Tuple[str, str]): Credentials for authentication.
data_point_name (str): The key which data items are nested under in the response object (e.g. calls)
params (Optional[Dict[str, Any]], optional): Optional dict of query params to include in the request.
Returns:
Generator of pages, each page is a list of dict data items.
Yields:
List[Dict[str, Any]]: Generator of pages, each page is a list of dict data items.
"""
# update the page size to enable cursor pagination
params = params or {}

View File

@@ -39,7 +39,7 @@ def chess_com_source(username: str, months: List[Dict[str, str]]) -> Iterator[Dl
months (List[Dict[str, str]]): List of dictionaries containing 'year' and 'month' keys.
Yields:
dlt.Resource: Resource objects containing fetched game data.
DltResource: Resource objects containing fetched game data.
"""
for month in months:
year = month["year"]

View File

@@ -28,7 +28,7 @@ pip install fastembed>=0.1.1
# NOTE: this line is only for dlt CI purposes, you may delete it if you are using this example
__source_name__ = "zendesk"
from typing import Optional, Dict, Any, Tuple
from typing import Optional, Dict, Any, Tuple, Iterable, List
import dlt
from dlt.common import pendulum
@@ -37,6 +37,7 @@ from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.adapters import qdrant_adapter
from qdrant_client import QdrantClient
from dlt.extract import DltResource
# function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk
@@ -45,19 +46,19 @@ def zendesk_support(
credentials: Dict[str, str] = dlt.secrets.value,
start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008
end_date: Optional[TAnyDateTime] = None,
):
) -> DltResource:
"""
Retrieves data from Zendesk Support for tickets events.
Args:
credentials: Zendesk credentials (default: dlt.secrets.value)
start_date: Start date for data extraction (default: 2000-01-01)
end_date: End date for data extraction (default: None).
credentials (Dict[str, str]): Zendesk credentials (default: dlt.secrets.value)
start_date (Optional[TAnyDateTime]): Start date for data extraction (default: 2000-01-01)
end_date (Optional[TAnyDateTime]): End date for data extraction (default: None).
If end time is not provided, the incremental loading will be
enabled, and after the initial run, only new data will be retrieved.
Returns:
DltResource.
DltResource: a resource with ticket data
"""
# Convert start_date and end_date to Pendulum datetime objects
start_date_obj = ensure_pendulum_datetime(start_date)
@@ -123,19 +124,19 @@ def get_pages(
auth: Tuple[str, str],
data_point_name: str,
params: Optional[Dict[str, Any]] = None,
):
) -> Iterable[List[Dict[str, Any]]]:
"""
Makes a request to a paginated endpoint and returns a generator of data items per page.
Args:
url: The base URL.
endpoint: The url to the endpoint, e.g. /api/v2/calls
auth: Credentials for authentication.
data_point_name: The key which data items are nested under in the response object (e.g. calls)
params: Optional dict of query params to include in the request.
url (str): The base URL.
endpoint (str): The url to the endpoint, e.g. /api/v2/calls
auth (Tuple[str, str]): Credentials for authentication.
data_point_name (str): The key which data items are nested under in the response object (e.g. calls)
params (Optional[Dict[str, Any]], optional): Optional dict of query params to include in the request.
Returns:
Generator of pages, each page is a list of dict data items.
Yields:
List[Dict[str, Any]]: Generator of pages, each page is a list of dict data items.
"""
# update the page size to enable cursor pagination
params = params or {}

View File

@@ -62,6 +62,8 @@ Verified source pipedrive was added to your project!
If the dlt dependency is already added, make sure you install the extra for bigquery to it
If you are using poetry you may issue the following command:
poetry add dlt -E bigquery
For uv, use:
uv add "dlt[bigquery]"
* Read https://dlthub.com/docs/walkthroughs/create-a-pipeline for more information
```

View File

@@ -15,9 +15,9 @@
"clear-versions": "node tools/clear_versions.js",
"update-versions": "node tools/update_versions.js",
"preprocess-docs": "node tools/preprocess_docs.js",
"gen-api-ref": "PYTHONPATH=. poetry run pydoc-markdown && poetry run python clean_pydoc_sidebar.py",
"gen-api-ref": "PYTHONPATH=. uv run pydoc-markdown && uv run python clean_pydoc_sidebar.py",
"gen-api-ref-netlify": "PYTHONPATH=. pydoc-markdown && python clean_pydoc_sidebar.py",
"render-cli-docs": "PYTHONPATH=. poetry run dlt render-docs > docs/reference/command-line-interface-generated.md"
"render-cli-docs": "PYTHONPATH=. uv run dlt render-docs > docs/reference/command-line-interface-generated.md"
},
"dependencies": {
"@docusaurus/core": "^3.7.0",

11891
poetry.lock generated

File diff suppressed because one or more lines are too long

View File

@@ -1,15 +1,20 @@
[tool.poetry]
[project]
name = "dlt"
version = "1.12.0"
description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
authors = ["dltHub Inc. <services@dlthub.com>"]
maintainers = [ "Marcin Rudolf <marcin@dlthub.com>", "Adrian Brudaru <adrian@dlthub.com>", "Anton Burnashev <anton@dlthub.com>", "David Scharf <david@dlthub.com>" ]
authors = [{ name = "dltHub Inc.", email = "services@dlthub.com" }]
requires-python = ">=3.9.2, <3.14, !=3.9.7"
readme = "README.md"
license = "Apache-2.0"
homepage = "https://github.com/dlt-hub"
repository = "https://github.com/dlt-hub/dlt"
maintainers = [
{ name = "Marcin Rudolf", email = "marcin@dlthub.com" },
{ name = "Adrian Brudaru", email = "adrian@dlthub.com" },
{ name = "Anton Burnashev", email = "anton@dlthub.com" },
{ name = "David Scharf", email = "david@dlthub.com" },
]
keywords = ["etl"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Topic :: Software Development :: Libraries",
@@ -21,299 +26,315 @@ classifiers = [
"Programming Language :: Python :: 3.13",
"Operating System :: MacOS :: MacOS X",
"Operating System :: POSIX :: Linux",
"Operating System :: Microsoft :: Windows",]
keywords = [ "etl" ]
include = [ "LICENSE.txt", "README.md", "dlt/sources/pipeline_templates/.gitignore", "dlt/sources/pipeline_templates/.dlt/config.toml" ]
packages = [
{ include = "dlt" },
"Operating System :: Microsoft :: Windows",
]
dependencies = [
"requests>=2.26.0",
"pendulum>=2.1.2",
"simplejson>=3.17.5",
"PyYAML>=5.4.1",
"semver>=3.0.0",
"hexbytes>=0.2.2",
"tzdata>=2022.1",
"tomlkit>=0.11.3",
"pathvalidate>=2.5.2",
"typing-extensions>=4.8.0",
"click>=7.1",
"requirements-parser>=0.5.0",
"setuptools>=65.6.0",
"humanize>=4.4.0",
"gitpython>=3.1.29",
"pytz>=2022.6",
"giturlparse>=0.10.0",
"orjson>=3.6.7,<4,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1 ; platform_python_implementation != 'PyPy'",
"tenacity>=8.0.2",
"jsonpath-ng>=1.5.3",
"fsspec>=2022.4.0",
"packaging>=21.1",
"pluggy>=1.3.0",
"win-precise-time>=1.4.2 ; os_name == 'nt' and python_version < '3.13'",
"sqlglot>=23.0.0",
"pywin32>=306 ; sys_platform == 'win32'",
"rich-argparse>=1.6.0",
]
[tool.poetry.dependencies]
python = ">=3.9.2, <3.14, !=3.9.7"
requests = ">=2.26.0"
pendulum = ">=2.1.2"
simplejson = ">=3.17.5"
PyYAML = ">=5.4.1"
semver = ">=3.0.0"
hexbytes = ">=0.2.2"
tzdata = ">=2022.1"
tomlkit = ">=0.11.3"
pathvalidate = ">=2.5.2"
typing-extensions = ">=4.8.0"
click = ">=7.1"
requirements-parser = ">=0.5.0"
setuptools = ">=65.6.0"
humanize = ">=4.4.0"
gitpython = ">=3.1.29"
pytz = ">=2022.6"
giturlparse = ">=0.10.0"
# exclude some versions because of segfault bugs in orjson
orjson = {version = ">=3.6.7,<4,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1", markers="platform_python_implementation != 'PyPy'"}
tenacity = ">=8.0.2"
jsonpath-ng = ">=1.5.3"
fsspec = ">=2022.4.0"
packaging = ">=21.1"
pluggy = ">=1.3.0"
win-precise-time = {version = ">=1.4.2", markers="os_name == 'nt' and python_version < '3.13'"}
sqlglot = ">=23.0.0"
rich-argparse = ">=1.6.0"
psycopg2-binary = {version = ">=2.9.1", optional = true}
grpcio = {version = ">=1.50.0", optional = true}
google-cloud-bigquery = {version = ">=2.26.0", optional = true}
pyarrow = [
{version = ">=14.0.0", markers = "python_version >= '3.9' and python_version < '3.13'", optional = true},
{version = ">=18.0.0", markers = "python_version >= '3.13'", optional = true}
[project.optional-dependencies]
gcp = [
"grpcio>=1.50.0",
"google-cloud-bigquery>=2.26.0",
"db-dtypes>=1.2.0",
"gcsfs>=2022.4.0",
]
duckdb = {version = ">=0.9", optional = true}
# keep per-python version dependency as a reference
# duckdb = [
# {version = ">=0.6.1,<0.10.0", python = ">=3.9,<3.12", optional = true},
# {version = ">=0.10.0,<0.11.0", python = ">=3.12", optional = true}
# ]
s3fs = {version = ">=2022.4.0", optional = true}
gcsfs = {version = ">=2022.4.0", optional = true}
botocore = {version = ">=1.28", optional = true}
snowflake-connector-python = {version = ">=3.5.0", optional = true}
cron-descriptor = {version = ">=1.2.32", optional = true}
pipdeptree = {version = ">=2.9.0,<2.10", optional = true}
# pip is used by pipdeptree but not listed in its dependencies
pip = {version = ">=23.0.0", optional = true}
pyathena = {version = ">=2.9.6", optional = true}
weaviate-client = {version = ">=3.22", optional = true}
adlfs = {version = ">=2024.7.0", optional = true}
pyodbc = {version = ">=4.0.39", optional = true}
qdrant-client = {version = ">=1.8", optional = true, extras = ["fastembed"]}
databricks-sql-connector = [
{version = ">=2.9.3,<4", optional = true, markers = "python_version <= '3.12'"},
{version = ">=3.6.0", optional = true, markers = "python_version >= '3.13'"},
bigquery = [
"grpcio>=1.50.0",
"google-cloud-bigquery>=2.26.0",
"pyarrow>=14.0.0; python_version < '3.13'",
"pyarrow>=18.0.0; python_version >= '3.13'",
"gcsfs>=2022.4.0",
"db-dtypes>=1.2.0",
]
postgres = [
"psycopg2-binary>=2.9.1"
]
redshift = [
"psycopg2-binary>=2.9.1"
]
parquet = [
"pyarrow>=14.0.0; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
duckdb = [
"duckdb>=0.9"
]
filesystem = [
"s3fs>=2022.4.0",
"botocore>=1.28",
]
s3 = [
"s3fs>=2022.4.0",
"botocore>=1.28",
]
gs = [
"gcsfs>=2022.4.0"
]
az = [
"adlfs>=2024.7.0"
]
sftp = [
"paramiko>=3.3.0"
]
snowflake = ["snowflake-connector-python>=3.5.0"]
motherduck = [
"duckdb>=0.9",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
cli = [
"pipdeptree>=2.9.0,<2.10",
"cron-descriptor>=1.2.32",
"pip>=23.0.0",
]
athena = [
"pyathena>=2.9.6",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
"s3fs>=2022.4.0",
"botocore>=1.28",
]
weaviate = [
"weaviate-client>=3.22"
]
mssql = [
"pyodbc>=4.0.39"
]
synapse = [
"pyodbc>=4.0.39",
"adlfs>=2024.7.0",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
qdrant = [
"qdrant-client[fastembed]>=1.8"
]
databricks = [
"databricks-sql-connector>=2.9.3,<4 ; python_version <= '3.12'",
"databricks-sql-connector>=3.6.0 ; python_version >= '3.13'",
"databricks-sdk>=0.38.0",
]
clickhouse = [
"clickhouse-driver>=0.2.7",
"clickhouse-connect>=0.7.7",
"s3fs>=2022.4.0",
"gcsfs>=2022.4.0",
"adlfs>=2024.7.0",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
dremio = [
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
lancedb = [
"lancedb>=0.8.2 ; python_version < '3.13'",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
"tantivy>= 0.22.0",
]
deltalake = [
"deltalake>=0.25.1",
"pyarrow>=14.0.0; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
]
sql_database = ["sqlalchemy>=1.4"]
sqlalchemy = [
"sqlalchemy>=1.4",
"alembic>1.10.0",
]
pyiceberg = [
"pyiceberg>=0.9.1",
"pyarrow>=14.0.0 ; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
"sqlalchemy>=1.4",
]
postgis = [
"psycopg2-binary>=2.9.1"
]
clickhouse-driver = { version = ">=0.2.7", optional = true }
clickhouse-connect = { version = ">=0.7.7", optional = true }
lancedb = { version = ">=0.8.2", optional = true, markers = "python_version < '3.13'", allow-prereleases = true }
tantivy = { version = ">= 0.22.0", optional = true }
deltalake = { version = ">=0.25.1", optional = true }
sqlalchemy = { version = ">=1.4", optional = true }
alembic = {version = ">1.10.0", optional = true}
paramiko = {version = ">=3.3.0", optional = true}
db-dtypes = { version = ">=1.2.0", optional = true }
# `sql-sqlite` extra leads to dependency conflict with `apache-airflow` because `apache-airflow`
# requires `sqlalchemy<2.0.0` while the extra requires `sqlalchemy>=2.0.18`
# https://github.com/apache/airflow/issues/28723
# pyiceberg = { version = ">=0.7.1", optional = true, extras = ["sql-sqlite"] }
# we will rely on manual installation of `sqlalchemy>=2.0.18` instead
pyiceberg = { version = ">=0.9.1", optional = true }
databricks-sdk = {version = ">=0.38.0", optional = true}
pywin32 = {version = ">=306", optional = true, platform = "win32"}
[tool.poetry.extras]
gcp = ["grpcio", "google-cloud-bigquery", "db-dtypes", "gcsfs"]
# bigquery is alias on gcp extras
bigquery = ["grpcio", "google-cloud-bigquery", "pyarrow", "gcsfs", "db-dtypes"]
postgres = ["psycopg2-binary"]
redshift = ["psycopg2-binary"]
parquet = ["pyarrow"]
duckdb = ["duckdb"]
filesystem = ["s3fs", "botocore"]
s3 = ["s3fs", "botocore"]
gs = ["gcsfs"]
az = ["adlfs"]
sftp = ["paramiko"]
snowflake = ["snowflake-connector-python"]
motherduck = ["duckdb", "pyarrow"]
cli = ["pipdeptree", "cron-descriptor", "pip"]
athena = ["pyathena", "pyarrow", "s3fs", "botocore"]
weaviate = ["weaviate-client"]
mssql = ["pyodbc"]
synapse = ["pyodbc", "adlfs", "pyarrow"]
qdrant = ["qdrant-client"]
databricks = ["databricks-sql-connector", "databricks-sdk"]
clickhouse = ["clickhouse-driver", "clickhouse-connect", "s3fs", "gcsfs", "adlfs", "pyarrow"]
dremio = ["pyarrow"]
lancedb = ["lancedb", "pyarrow", "tantivy"]
deltalake = ["deltalake", "pyarrow"]
sql_database = ["sqlalchemy"]
sqlalchemy = ["sqlalchemy", "alembic"]
pyiceberg = ["pyiceberg", "pyarrow", "sqlalchemy"]
postgis = ["psycopg2-binary"]
[project.urls]
Homepage = "https://github.com/dlt-hub"
Repository = "https://github.com/dlt-hub/dlt"
[tool.poetry.scripts]
[project.scripts]
dlt = "dlt.cli._dlt:_main"
[tool.poetry.group.dev.dependencies]
cffi = ">=1.16"
greenlet = ">=3.1"
regex = ">=2024.10"
sqlalchemy = "<2"
requests-mock = "^1.10.0"
types-click = "^7.1.8"
sqlfluff = "^2.3.2"
types-deprecated = "^1.2.9.2"
pytest-console-scripts = "^1.4.1"
pytest = "^7.0.0"
mypy = ">=1.11.0,<1.13.0"
flake8 = "^7.0.0"
bandit = "^1.7.0"
black = "^23.7.0"
isort = "^5.12.0"
flake8-bugbear = "^22.0.0"
pytest-order = ">=1.0.0"
pytest-cases = ">=3.6.9"
pytest-forked = ">=1.3.0"
types-PyYAML = ">=6.0.7"
types-cachetools = ">=4.2.9"
types-protobuf = ">=3.19.8"
types-simplejson = ">=3.17.0"
types-requests = ">=2.25.6"
types-python-dateutil = ">=2.8.15"
flake8-tidy-imports = ">=4.8.0"
[dependency-groups]
dev = [
"cffi>=1.16",
"greenlet>=3.1",
"regex>=2024.10",
"sqlalchemy<2",
"requests-mock>=1.10.0,<2",
"types-click>=7.1.8,<8",
"sqlfluff>=2.3.2,<3",
"types-deprecated>=1.2.9.2,<2",
"pytest-console-scripts>=1.4.1,<2",
"pytest>=7.0.0,<8",
"mypy>=1.11.0,<1.13.0",
"flake8>=7.0.0,<8",
"bandit>=1.7.0,<2",
"black>=23.7.0,<24",
"isort>=5.12.0,<6",
"flake8-bugbear>=22.0.0,<23",
"pytest-order>=1.0.0",
"pytest-cases>=3.6.9",
"pytest-forked>=1.3.0",
"types-PyYAML>=6.0.7",
"types-cachetools>=4.2.9",
"types-protobuf>=3.19.8",
"types-simplejson>=3.17.0",
"types-requests>=2.25.6",
"types-python-dateutil>=2.8.15",
"flake8-tidy-imports>=4.8.0",
"flake8-encodings",
"flake8-builtins>=1.5.3,<2",
"boto3-stubs>=1.28.28,<2",
"types-tqdm>=4.66.0.2,<5",
"types-psutil>=5.9.5.16,<6",
"types-psycopg2>=2.9.21.14,<3",
"cryptography>=41.0.7,<42",
"google-api-python-client>=1.7.11",
"pytest-asyncio>=0.23.5,<0.24",
"types-sqlalchemy>=1.4.53.38,<2",
"types-pytz>=2024.1.0.20240203",
"ruff>=0.3.2,<0.4",
"pyjwt>=2.8.0,<3",
"pytest-mock>=3.14.0,<4",
"types-regex>=2024.5.15.20240519,<2025",
"flake8-print>=5.0.0,<6",
"mimesis>=7.0.0,<8",
"shapely>=2.0.6",
"pip>=24.0.0",
# keep last duckdb version that works with azure and iceberg correctly
"duckdb<1.2.1",
"pydoclint>=0.6.5,<0.7",
# limit the pyarrow version not to test on too new one
"pyarrow>=14.0.0,<19.0.0",
]
sources = [
"connectorx>=0.3.3 ; python_version >= '3.9'",
"connectorx>=0.4.0,<0.4.2 ; python_version >= '3.10'",
"pymysql>=1.1.0,<2",
"openpyxl>=3,<4",
]
pipeline = [
"google-auth-oauthlib>=1.0.0,<2",
"tqdm>=4.65.0,<5",
"enlighten>=1.11.2,<2",
"alive-progress>=3.1.1,<4",
"pydantic>=2.10",
"numpy>=1.21,<2.0 ; python_version >= '3.9' and python_version < '3.12'",
"numpy>=1.26,<2.0 ; python_version >= '3.12' and python_version < '3.13'",
"numpy>=2.0.0 ; python_version >= '3.13'",
"pandas>2.1 ; python_version >= '3.12'",
"pandas<2.1 ; python_version < '3.12'",
]
airflow = ["apache-airflow>=2.8.0,<3 ; python_version < '3.12'"]
ibis = ["ibis-framework[duckdb, postgres, bigquery, snowflake, mssql, clickhouse, databricks]>=10.3.0 ; python_version >= '3.10'"]
streamlit = ["streamlit>=1.40.0,<2 ; python_version >= '3.9' and python_version != '3.9.7' and python_version < '3.14'"]
marimo = [
"marimo>=0.13.6",
"playwright>=1.52.0,<2",
"pytest-playwright>=0.7.0,<1",
"pyarrow>=14.0.0; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'"
]
providers = ["google-api-python-client>=2.86.0,<3"]
sentry-sdk = ["sentry-sdk>=2.0.0,<3"]
dbt = [
"dbt-core>=1.5.0",
"dbt-redshift>=1.5.0",
"dbt-bigquery>=1.5.0",
"dbt-duckdb>=1.5.0",
"dbt-snowflake>=1.5.0",
"dbt-athena-community>=1.5.0",
"dbt-sqlserver>=1.5.0",
]
adbc = [
"adbc-driver-postgresql>=1.6.0"
]
docs = [
"SQLAlchemy>=1.4.0",
"pymysql>=1.1.0,<2",
"pypdf2>=3.0.1,<4",
"pydoc-markdown>=4.8.2,<5",
"dbt-core>=1.5.0",
"dbt-duckdb>=1.5.0",
"pymongo>=4.3.3",
"pandas>2",
"alive-progress>=3.0.1",
"pyarrow>=14.0.0; python_version < '3.13'",
"pyarrow>=18.0.0 ; python_version >= '3.13'",
"psycopg2-binary>=2.9",
"lancedb>=0.8.2 ; python_version < '3.13'",
"openai>=1.45",
"connectorx>=0.3.2,<0.4.2",
"modal>=0.64.170",
]
[tool.uv]
[tool.uv.sources]
flake8-encodings = { git = "https://github.com/dlt-hub/flake8-encodings.git", branch = "disable_jedi_support" }
flake8-builtins = "^1.5.3"
boto3-stubs = "^1.28.28"
types-tqdm = "^4.66.0.2"
types-psutil = "^5.9.5.16"
types-psycopg2 = "^2.9.21.14"
cryptography = "^41.0.7"
google-api-python-client = ">=1.7.11"
pytest-asyncio = "^0.23.5"
types-sqlalchemy = "^1.4.53.38"
types-pytz = ">=2024.1.0.20240203"
ruff = "^0.3.2"
pyjwt = "^2.8.0"
pytest-mock = "^3.14.0"
types-regex = "^2024.5.15.20240519"
flake8-print = "^5.0.0"
mimesis = "^7.0.0"
shapely = ">=2.0.6"
pip = ">=24.0.0"
# keep last duckdb version that works with azure and iceberg correctly
duckdb = "<1.2.1"
# limit the pyarrow version not to test on too new one
pyarrow = "<19"
pydoclint = "^0.6.5"
[tool.poetry.group.sources]
optional = true
[tool.poetry.group.sources.dependencies]
connectorx = [
{version = ">=0.3.3", python = ">=3.9"},
{version = ">=0.4.0,<0.4.2", python = ">=3.10"}
]
pymysql = "^1.1.0"
openpyxl = "^3"
[tool.poetry.group.adbc]
optional = true
[tool.poetry.group.adbc.dependencies]
adbc-driver-postgresql = ">=1.6.0"
[tool.poetry.group.pipeline]
optional = true
[tool.poetry.group.pipeline.dependencies]
google-auth-oauthlib = "^1.0.0"
tqdm = "^4.65.0"
enlighten = "^1.11.2"
alive-progress = "^3.1.1"
pydantic = ">=2.10"
numpy = [
{ version = ">=1.21", python = ">=3.9,<3.12" },
{ version = ">=1.26", python = ">=3.12" },
{ version = ">=2.0.0", python = ">=3.13" }
]
pandas = [
{version = ">2.1", markers = "python_version >= '3.12'"},
{version = "<2.1", markers = "python_version < '3.12'"}
[tool.hatch.build.targets.sdist]
packages = ["dlt"]
include = [
"LICENSE.txt",
"README.md",
"dlt/sources/pipeline_templates/.gitignore",
"dlt/sources/pipeline_templates/.dlt/config.toml",
]
# dependencies to run and test airflow
[tool.poetry.group.airflow]
optional = true
[tool.poetry.group.airflow.dependencies]
apache-airflow = {version = "^2.8.0", markers = "python_version < '3.12'"}
# dependencies to run and test things that require ibis
[tool.poetry.group.ibis]
optional = true
[tool.poetry.group.ibis.dependencies]
ibis-framework = { version = ">=10.3.0", markers = "python_version >= '3.10'", extras = ["duckdb", "postgres", "bigquery", "snowflake", "mssql", "clickhouse", "databricks"]}
# dependencies to run and test the streamlit app (streamlit does not work with python 3.9.7)
[tool.poetry.group.streamlit]
optional = true
[tool.poetry.group.streamlit.dependencies]
streamlit = {version = "^1.40.0", markers = "python_version >= '3.9' and python_version != '3.9.7' and python_version < '3.14'"}
# dependencies to run and test the marimo app
[tool.poetry.group.marimo]
optional = true
[tool.poetry.group.marimo.dependencies]
marimo = "^0.13.6"
playwright = "^1.52.0"
pytest-playwright = "^0.7.0"
# TODO: make marimo app independent of pyarrow
pandas = [
{version = ">2.1", markers = "python_version >= '3.12'"},
{version = "<2.1", markers = "python_version < '3.12'"}
[tool.hatch.build.targets.wheel]
packages = ["dlt"]
include = [
"LICENSE.txt",
"README.md",
"dlt/sources/pipeline_templates/.gitignore",
"dlt/sources/pipeline_templates/.dlt/config.toml",
]
[tool.poetry.group.providers]
optional = true
[tool.poetry.group.providers.dependencies]
google-api-python-client = "^2.86.0"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.poetry.group.sentry-sdk]
optional = true
[tool.poetry.group.sentry-sdk.dependencies]
sentry-sdk = "^2.0.0"
[tool.poetry.group.dbt]
optional = true
[tool.poetry.group.dbt.dependencies]
dbt-core = ">=1.5.0"
dbt-redshift = ">=1.5.0"
dbt-bigquery = ">=1.5.0"
dbt-duckdb = ">=1.5.0"
dbt-snowflake = ">=1.5.0"
dbt-athena-community = ">=1.5.0"
dbt-sqlserver = ">=1.5.0"
# dbt-databricks = {version = ">=1.7.3", optional = true}
[tool.poetry.group.docs]
optional = true
[tool.poetry.group.docs.dependencies]
SQLAlchemy = ">=1.4.0"
pymysql = "^1.1.0"
pypdf2 = "^3.0.1"
pydoc-markdown = "^4.8.2"
dbt-core = ">=1.5.0"
dbt-duckdb = ">=1.5.0"
pymongo = ">=4.3.3"
pandas = ">2"
alive-progress = ">=3.0.1"
pyarrow = [
{version = ">=17.0.0", markers = "python_version < '3.13'"},
{version = ">=18.0.0", markers = "python_version >= '3.13'"}
]
psycopg2-binary = ">=2.9"
lancedb = [
{ version = ">=0.8.2", markers = "python_version < '3.13'", allow-prereleases = true }
]
openai = ">=1.45"
connectorx = { version = ">=0.3.2,<0.4.2" }
modal = ">=0.64.170"
[tool.black] # https://black.readthedocs.io/en/stable/usage_and_configuration/the_basics.html#configuration-via-a-file
[tool.black]
line-length = 100
preview = true
[tool.isort] # https://pycqa.github.io/isort/docs/configuration/options.html
[tool.isort]
color_output = true
line_length = 100
profile = "black"
src_paths = ["dlt"]
multi_line_output = 3
[build-system]
requires = ["poetry-core>=1.0.8"]
build-backend = "poetry.core.masonry.api"

View File

@@ -31,9 +31,12 @@ def test_main_telemetry_command(test_storage: FileStorage) -> None:
def _initial_providers(self):
return [ConfigTomlProvider(run_context.settings_dir, global_dir=run_context.global_dir)]
with set_working_dir(test_storage.make_full_path("project")), patch(
"dlt.common.runtime.run_context.RunContext.initial_providers",
_initial_providers,
with (
set_working_dir(test_storage.make_full_path("project")),
patch(
"dlt.common.runtime.run_context.RunContext.initial_providers",
_initial_providers,
),
):
# no config files: status is ON
with io.StringIO() as buf, contextlib.redirect_stdout(buf):

View File

@@ -44,7 +44,7 @@ def test_deploy_command_no_repo(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
# test wrapper
@@ -53,7 +53,7 @@ def test_deploy_command_no_repo(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert ex._excinfo[1].error_code == -4
@@ -79,7 +79,7 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert "Your current repository has no origin set" in py_ex.value.args[0]
with pytest.raises(CliCommandInnerException):
@@ -87,7 +87,7 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
# we have a repo that was never run
@@ -97,14 +97,14 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
with pytest.raises(CliCommandException) as ex:
_dlt.deploy_command_wrapper(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert ex._excinfo[1].error_code == -3
@@ -120,7 +120,7 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert "The last pipeline run ended with error" in py_ex2.value.args[0]
with pytest.raises(CliCommandException) as ex:
@@ -128,7 +128,7 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert ex._excinfo[1].error_code == -3
@@ -151,7 +151,7 @@ def test_deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
_out = buf.getvalue()
print(_out)
@@ -172,7 +172,7 @@ def test_deploy_command(
"no_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
with echo.always_choose(False, always_choose_value=True):
with pytest.raises(CliCommandException) as ex:
@@ -180,6 +180,6 @@ def test_deploy_command(
"no_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
**deployment_args,
)
assert ex._excinfo[1].error_code == -5

View File

@@ -160,8 +160,9 @@ def pytest_configure(config):
]:
logging.getLogger(log).setLevel("ERROR")
with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(
io.StringIO()
with (
contextlib.redirect_stdout(io.StringIO()),
contextlib.redirect_stderr(io.StringIO()),
):
db.resetdb()

View File

@@ -146,14 +146,17 @@ def test_successful_load(write_disposition: str, layout: str, default_buckets_en
dataset_name = "test_" + uniq_id()
timestamp = ensure_pendulum_datetime("2024-04-05T09:16:59.942779Z")
mocked_timestamp = {"state": {"created_at": timestamp}}
with mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
), perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition=write_disposition,
) as load_info:
with (
mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
),
perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition=write_disposition,
) as load_info,
):
client, jobs, _, load_id = load_info
layout = client.config.layout
dataset_path = posixpath.join(client.bucket_path, client.config.dataset_name)
@@ -194,14 +197,17 @@ def test_replace_write_disposition(layout: str, default_buckets_env: str) -> Non
# state is typed now
timestamp = ensure_pendulum_datetime("2024-04-05T09:16:59.942779Z")
mocked_timestamp = {"state": {"created_at": timestamp}}
with mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
), perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition="replace",
) as load_info:
with (
mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
),
perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition="replace",
) as load_info,
):
client, _, root_path, load_id1 = load_info
layout = client.config.layout
# this path will be kept after replace
@@ -270,14 +276,17 @@ def test_append_write_disposition(layout: str, default_buckets_env: str) -> None
# also we would like to have reliable timestamp for this test so we patch it
timestamp = ensure_pendulum_datetime("2024-04-05T09:16:59.942779Z")
mocked_timestamp = {"state": {"created_at": timestamp}}
with mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
), perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition="append",
) as load_info:
with (
mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
),
perform_load(
dataset_name,
NORMALIZED_FILES,
write_disposition="append",
) as load_info,
):
client, jobs1, root_path, load_id1 = load_info
with perform_load(dataset_name, NORMALIZED_FILES, write_disposition="append") as load_info:
client, jobs2, root_path, load_id2 = load_info

View File

@@ -26,9 +26,10 @@ def get_key_path(user: str = "foo") -> str:
def files_are_equal(file1_path, file2_path):
try:
with open(file1_path, "r", encoding="utf-8") as f1, open(
file2_path, "r", encoding="utf-8"
) as f2:
with (
open(file1_path, "r", encoding="utf-8") as f1,
open(file2_path, "r", encoding="utf-8") as f2,
):
return f1.read() == f2.read()
except FileNotFoundError:
return False

View File

@@ -979,13 +979,16 @@ def assert_complete_job(
# will complete all jobs
timestamp = "2024-04-05T09:16:59.942779Z"
mocked_timestamp = {"state": {"created_at": timestamp}}
with mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
), patch.object(
dummy_impl.DummyClient,
"complete_load",
) as complete_load:
with (
mock.patch(
"dlt.current.load_package",
return_value=mocked_timestamp,
),
patch.object(
dummy_impl.DummyClient,
"complete_load",
) as complete_load,
):
with ThreadPoolExecutor() as pool:
load.run(pool)

View File

@@ -505,8 +505,9 @@ def test_load_none_trace() -> None:
def test_trace_telemetry(temporary_telemetry: RuntimeConfiguration) -> None:
with patch("dlt.common.runtime.sentry.before_send", _mock_sentry_before_send), patch(
"dlt.common.runtime.anon_tracker.before_send", _mock_anon_tracker_before_send
with (
patch("dlt.common.runtime.sentry.before_send", _mock_sentry_before_send),
patch("dlt.common.runtime.anon_tracker.before_send", _mock_anon_tracker_before_send),
):
ANON_TRACKER_SENT_ITEMS.clear()
SENTRY_SENT_ITEMS.clear()

View File

@@ -346,9 +346,12 @@ def setup_secret_providers_to_current_module(request):
ConfigTomlProvider(settings_dir=config_dir),
]
with set_working_dir(dname), patch(
"dlt.common.runtime.run_context.RunContext.initial_providers",
_initial_providers,
with (
set_working_dir(dname),
patch(
"dlt.common.runtime.run_context.RunContext.initial_providers",
_initial_providers,
),
):
Container()[PluggableRunContext].reload_providers()

View File

@@ -1,24 +0,0 @@
import sys
# File and string to search for
lockfile_name = "poetry.lock"
hash_string = "hash = "
threshold = 100
try:
count = 0
with open(lockfile_name, 'r', encoding="utf8") as file:
for line in file:
if hash_string in line:
count += 1
if count >= threshold:
print(f"Success: Found '{hash_string}' more than {threshold} times in {lockfile_name}.")
sys.exit(0)
# If the loop completes without early exit, it means the threshold was not reached
print(f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date poetry version.")
sys.exit(1)
except FileNotFoundError:
print(f"Error: File {lockfile_name} does not exist.")
sys.exit(1)

9574
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff