From b062dcafa4ce2522fd46e0be3fe16043253a688d Mon Sep 17 00:00:00 2001 From: rudolfix Date: Sun, 21 Sep 2025 00:15:08 +0200 Subject: [PATCH] docs/removes dlt plus docs and adds eula (#3079) * answers defaults in cli if tty disconnected * adds method to send anon tracker event even if disabled * fixes types in source/resource build in generator * adds dlt.hub with transformation decorator * moves dlt-plus to separate sidebar in docs, renames to dltHub Features, adds EULA * renamed plus to hub in docs * fixes docs logos * removes more dlt+ * renames plus tests * fixes ci run main * fixes hub workflows --- .github/workflows/main.yml | 6 +- .github/workflows/test_docs_snippets.yml | 10 +- .github/workflows/test_examples.yml | 6 +- .../workflows/{test_plus.yml => test_hub.yml} | 12 +- LICENSE.txt | 2 +- Makefile | 4 +- dlt/__init__.py | 3 +- dlt/cli/_dlt.py | 6 +- dlt/cli/echo.py | 10 +- dlt/cli/telemetry_command.py | 1 - dlt/common/runtime/anon_tracker.py | 34 ++- dlt/common/runtime/telemetry.py | 6 +- dlt/common/utils.py | 11 +- dlt/extract/resource.py | 3 +- dlt/extract/source.py | 6 +- dlt/hub.py | 8 + .../dlt-ecosystem/destinations/databricks.md | 2 +- .../dlt-ecosystem/destinations/iceberg.md | 4 +- .../dlt-ecosystem/transformations/dbt/dbt.md | 4 - .../transformations/dbt/dbt_cloud.md | 4 - .../verified-sources/sql_database/index.md | 4 +- .../docs/general-usage/credentials/setup.md | 4 - docs/website/docs/hub/EULA.md | 187 +++++++++++++++++ .../docs/{plus => hub}/core-concepts/cache.md | 0 .../{plus => hub}/core-concepts/datasets.md | 0 .../{plus => hub}/core-concepts/profiles.md | 0 .../{plus => hub}/core-concepts/project.md | 0 .../docs/{plus => hub}/ecosystem/delta.md | 0 .../docs/{plus => hub}/ecosystem/iceberg.md | 0 .../docs/{plus => hub}/ecosystem/ms-sql.md | 2 +- .../{plus => hub}/ecosystem/snowflake_plus.md | 0 .../website/docs/{plus => hub}/features/ai.md | 0 .../{plus => hub}/features/data-access.md | 0 .../features}/mcp-server.md | 6 +- .../{plus => hub}/features/project/index.md | 0 .../features/project/overview.md | 0 .../features/project/python-api.md | 0 .../features/project/source-configuration.md | 0 .../docs/{plus => hub}/features/projects.md | 0 .../features/quality/data-quality.md | 0 .../{plus => hub}/features/quality/tests.md | 0 .../transformations/dbt-transformations.md | 0 .../features}/transformations/index.md | 20 +- .../features/transformations/setup.md | 2 - .../transformation-snippets.py | 32 +-- .../getting-started/advanced_tutorial.md | 0 .../getting-started/installation.md | 0 .../{plus => hub}/getting-started/tutorial.md | 0 docs/website/docs/hub/intro.md | 44 ++++ .../{plus => hub}/production/observability.md | 0 .../production/pipeline-runner.md | 0 docs/website/docs/{plus => hub}/reference.md | 0 .../plus/features/transformations/index.md | 21 -- .../transformations/python-transformations.md | 49 ----- docs/website/docs/plus/intro.md | 27 --- docs/website/sidebars.js | 195 +++++++++--------- docs/website/src/css/custom.css | 88 ++++---- .../src/theme/DltHubFeatureAdmonition.js | 11 + .../src/theme/DocBreadcrumbs/index.tsx | 4 +- docs/website/src/theme/Heading/index.js | 8 +- .../website/src/theme/PlusAdmonition/index.js | 11 - docs/website/static/img/dlt+_logo.png | Bin 1461 -> 0 bytes docs/website/static/img/dlthub_logo.png | Bin 0 -> 2054 bytes tests/cli/common/test_cli_invoke.py | 12 +- tests/cli/test_deploy_command.py | 13 +- tests/common/runtime/test_telemetry.py | 11 + tests/{plus => hub}/__init__.py | 2 +- tests/{plus => hub}/test_cli.py | 0 tests/{plus => hub}/test_destinations.py | 0 tests/{plus => hub}/test_sources.py | 0 tests/hub/test_transformations.py | 14 ++ 71 files changed, 557 insertions(+), 352 deletions(-) rename .github/workflows/{test_plus.yml => test_hub.yml} (92%) create mode 100644 dlt/hub.py create mode 100644 docs/website/docs/hub/EULA.md rename docs/website/docs/{plus => hub}/core-concepts/cache.md (100%) rename docs/website/docs/{plus => hub}/core-concepts/datasets.md (100%) rename docs/website/docs/{plus => hub}/core-concepts/profiles.md (100%) rename docs/website/docs/{plus => hub}/core-concepts/project.md (100%) rename docs/website/docs/{plus => hub}/ecosystem/delta.md (100%) rename docs/website/docs/{plus => hub}/ecosystem/iceberg.md (100%) rename docs/website/docs/{plus => hub}/ecosystem/ms-sql.md (99%) rename docs/website/docs/{plus => hub}/ecosystem/snowflake_plus.md (100%) rename docs/website/docs/{plus => hub}/features/ai.md (100%) rename docs/website/docs/{plus => hub}/features/data-access.md (100%) rename docs/website/docs/{dlt-ecosystem/llm-tooling => hub/features}/mcp-server.md (95%) rename docs/website/docs/{plus => hub}/features/project/index.md (100%) rename docs/website/docs/{plus => hub}/features/project/overview.md (100%) rename docs/website/docs/{plus => hub}/features/project/python-api.md (100%) rename docs/website/docs/{plus => hub}/features/project/source-configuration.md (100%) rename docs/website/docs/{plus => hub}/features/projects.md (100%) rename docs/website/docs/{plus => hub}/features/quality/data-quality.md (100%) rename docs/website/docs/{plus => hub}/features/quality/tests.md (100%) rename docs/website/docs/{plus => hub}/features/transformations/dbt-transformations.md (100%) rename docs/website/docs/{general-usage => hub/features}/transformations/index.md (94%) rename docs/website/docs/{plus => hub}/features/transformations/setup.md (95%) rename docs/website/docs/{general-usage => hub/features}/transformations/transformation-snippets.py (95%) rename docs/website/docs/{plus => hub}/getting-started/advanced_tutorial.md (100%) rename docs/website/docs/{plus => hub}/getting-started/installation.md (100%) rename docs/website/docs/{plus => hub}/getting-started/tutorial.md (100%) create mode 100644 docs/website/docs/hub/intro.md rename docs/website/docs/{plus => hub}/production/observability.md (100%) rename docs/website/docs/{plus => hub}/production/pipeline-runner.md (100%) rename docs/website/docs/{plus => hub}/reference.md (100%) delete mode 100644 docs/website/docs/plus/features/transformations/index.md delete mode 100644 docs/website/docs/plus/features/transformations/python-transformations.md delete mode 100644 docs/website/docs/plus/intro.md create mode 100644 docs/website/src/theme/DltHubFeatureAdmonition.js delete mode 100644 docs/website/src/theme/PlusAdmonition/index.js delete mode 100644 docs/website/static/img/dlt+_logo.png create mode 100644 docs/website/static/img/dlthub_logo.png rename tests/{plus => hub}/__init__.py (50%) rename tests/{plus => hub}/test_cli.py (100%) rename tests/{plus => hub}/test_destinations.py (100%) rename tests/{plus => hub}/test_sources.py (100%) create mode 100644 tests/hub/test_transformations.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d04b85ec..dc5426765 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -61,10 +61,10 @@ jobs: needs: test_common uses: ./.github/workflows/test_sources_local.yml - test_plus: - name: test dlt+ connection + test_hub: + name: test dlthub features needs: lint - uses: ./.github/workflows/test_plus.yml + uses: ./.github/workflows/test_hub.yml test_tools_airflow: name: test airflow helpers diff --git a/.github/workflows/test_docs_snippets.yml b/.github/workflows/test_docs_snippets.yml index ea258c52b..72ad35755 100644 --- a/.github/workflows/test_docs_snippets.yml +++ b/.github/workflows/test_docs_snippets.yml @@ -68,15 +68,15 @@ jobs: activate-environment: true enable-cache: true - - name: Install dlt-plus nightly devel build without cache - run: uv run pip install --upgrade --force-reinstall --no-cache-dir --pre dlt-plus + - name: Install dependencies + run: uv sync --extra duckdb --extra weaviate --extra parquet --extra qdrant --extra bigquery --extra postgres --extra lancedb --extra s3 --extra workspace --group docs --group sentry-sdk --group ibis --group providers + + - name: Install dlt-plus incl alpha releases + run: uv run pip install --pre dlt-plus - name: run docs preprocessor run: make preprocess-docs - - name: Install dependencies - run: uv sync --extra duckdb --extra weaviate --extra parquet --extra qdrant --extra bigquery --extra postgres --extra lancedb --extra s3 --extra workspace --group docs --group sentry-sdk --group ibis --group providers - - name: Create secrets.toml for snippets run: | cp tests/.dlt/dev.secrets.toml docs/website/docs/.dlt/secrets.toml diff --git a/.github/workflows/test_examples.yml b/.github/workflows/test_examples.yml index 55bddfe08..94807f7c5 100644 --- a/.github/workflows/test_examples.yml +++ b/.github/workflows/test_examples.yml @@ -44,12 +44,12 @@ jobs: activate-environment: true enable-cache: true - - name: Install dlt-plus nightly devel build without cache - run: uv run pip install --upgrade --force-reinstall --no-cache-dir --pre dlt-plus - - name: Install dependencies run: uv sync --extra duckdb --extra weaviate --extra parquet --extra qdrant --extra bigquery --extra postgres --extra lancedb --extra s3 --extra workspace --group docs --group sentry-sdk --group ibis --group providers + - name: Install dlt-plus incl alpha releases + run: uv run pip install --pre dlt-plus + - name: create secrets.toml for examples run: pwd && echo "$DLT_SECRETS_TOML" > docs/examples/.dlt/secrets.toml diff --git a/.github/workflows/test_plus.yml b/.github/workflows/test_hub.yml similarity index 92% rename from .github/workflows/test_plus.yml rename to .github/workflows/test_hub.yml index 934485902..68907f100 100644 --- a/.github/workflows/test_plus.yml +++ b/.github/workflows/test_hub.yml @@ -1,4 +1,4 @@ -name: plus | plus +name: hub | dlthub features # # dlt-plus smoke tests against the nightly build. @@ -12,7 +12,7 @@ env: RUNTIME__LOG_LEVEL: ERROR jobs: - run_plus: + run_hub_features: name: test strategy: fail-fast: false @@ -65,16 +65,16 @@ jobs: run: uv run pip install --upgrade --force-reinstall --no-cache-dir ${{ matrix.plus_dep }} - name: Run tests - run: pytest tests/plus + run: pytest tests/hub if: matrix.os == 'ubuntu-latest' - name: Run tests on mac on win without mssql driver - run: pytest tests/plus -m "not mssql" + run: pytest tests/hub -m "not mssql" if: matrix.os == 'macos-latest' || matrix.os == 'windows-latest' matrix_job_required_check: - name: plus | plus tests - needs: run_plus + name: hub | dlthub features tests + needs: run_hub_features runs-on: ubuntu-latest if: always() steps: diff --git a/LICENSE.txt b/LICENSE.txt index fa1b4ed2d..de4f65591 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -188,7 +188,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2022 ScaleVector + Copyright 2022-2025 ScaleVector Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Makefile b/Makefile index 381f6fb06..fcb4caadc 100644 --- a/Makefile +++ b/Makefile @@ -74,8 +74,8 @@ lint-and-test-snippets: lint-snippets uv pip install docstring_parser_fork --reinstall uv run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed --exclude docs/website/versioned_docs/ --exclude docs/website/docs/general-usage/transformations/transformation-snippets.py uv run ruff check - uv run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo --exclude docs/website/docs/general-usage/transformations/transformation-snippets.py - cd docs/website/docs && uv run pytest --ignore=node_modules --ignore general-usage/transformations/transformation-snippets.py + uv run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo --exclude docs/website/docs/hub/features/transformations/transformation-snippets.py + cd docs/website/docs && uv run pytest --ignore=node_modules --ignore hub/features/transformations/transformation-snippets.py lint-and-test-examples: uv pip install docstring_parser_fork --reinstall diff --git a/dlt/__init__.py b/dlt/__init__.py index 9e887dc57..7ebc87350 100644 --- a/dlt/__init__.py +++ b/dlt/__init__.py @@ -5,7 +5,7 @@ How to create a data loading pipeline with dlt in 3 seconds: 1. Write a pipeline script >>> import dlt >>> from dlt.sources.helpers import requests ->>> dlt.run(requests.get("https://pokeapi.co/api/v2/pokemon/").json()["results"], destination="duckdb", table_name="pokemon") +>>> print(dlt.run(requests.get("https://pokeapi.co/api/v2/pokemon/").json()["results"], destination="duckdb", table_name="pokemon")) 2. Run your pipeline script > $ python pokemon.py @@ -42,6 +42,7 @@ from dlt.pipeline import ( ) from dlt.pipeline import progress from dlt import destinations +from dlt import hub as hub pipeline = _pipeline current = _current diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py index b59852a2f..89e2d50c0 100644 --- a/dlt/cli/_dlt.py +++ b/dlt/cli/_dlt.py @@ -12,10 +12,10 @@ import dlt.cli.echo as fmt from dlt.cli.exceptions import CliCommandException from dlt.cli.command_wrappers import ( - deploy_command_wrapper, telemetry_change_status_command_wrapper, ) from dlt.cli import debug +from dlt.cli.echo import maybe_no_stdin ACTION_EXECUTED = False @@ -190,7 +190,9 @@ def main() -> int: if cmd := installed_commands.get(args.command): try: - cmd.execute(args) + # switch to non-interactive if tty not connected + with maybe_no_stdin(): + cmd.execute(args) except Exception as ex: docs_url = cmd.docs_url if hasattr(cmd, "docs_url") else DEFAULT_DOCS_URL error_code = -1 diff --git a/dlt/cli/echo.py b/dlt/cli/echo.py index e64dbf02a..678ff821f 100644 --- a/dlt/cli/echo.py +++ b/dlt/cli/echo.py @@ -1,5 +1,6 @@ +import sys import contextlib -from typing import Any, Iterable, Iterator, Optional +from typing import Any, Iterable, Iterator, Optional, ContextManager import click @@ -40,6 +41,13 @@ def suppress_echo() -> Iterator[None]: error, warning, note = original_error, original_warning, original_note +def maybe_no_stdin() -> ContextManager[None]: + """Automatically choose default values if stdin not connected""" + return always_choose( + True if not sys.stdin.isatty() else ALWAYS_CHOOSE_DEFAULT, ALWAYS_CHOOSE_VALUE + ) + + echo = click.echo secho = click.secho style = click.style diff --git a/dlt/cli/telemetry_command.py b/dlt/cli/telemetry_command.py index e5e7b41e1..da8e13396 100644 --- a/dlt/cli/telemetry_command.py +++ b/dlt/cli/telemetry_command.py @@ -1,5 +1,4 @@ import os -import tomlkit from dlt.common.configuration.container import Container from dlt.common.configuration.providers.toml import ConfigTomlProvider diff --git a/dlt/common/runtime/anon_tracker.py b/dlt/common/runtime/anon_tracker.py index b247d35e3..77b6cef27 100644 --- a/dlt/common/runtime/anon_tracker.py +++ b/dlt/common/runtime/anon_tracker.py @@ -1,9 +1,10 @@ """dltHub telemetry using using anonymous tracker""" # several code fragments come from https://github.com/RasaHQ/rasa/blob/main/rasa/telemetry.py +import contextlib import os import base64 -from typing import Literal, Optional +from typing import Iterator, Literal, Optional from requests import Session from dlt.common import logger @@ -100,6 +101,22 @@ def before_send(event: DictStrAny) -> Optional[DictStrAny]: return event +@contextlib.contextmanager +def always_track() -> Iterator[None]: + # if anon tracker was disabled + disable_after = _ANON_TRACKER_ENDPOINT is None + if disable_after: + from dlt.common.configuration.container import Container + from dlt.common.configuration.specs.pluggable_run_context import PluggableRunContext + + init_anon_tracker(Container()[PluggableRunContext].runtime_config) + try: + yield + finally: + if disable_after: + disable_anon_tracker() + + def _tracker_request_header(write_key: str) -> StrAny: """Use a segment write key to create authentication headers for the segment API. @@ -192,20 +209,25 @@ def _send_event(event_name: str, properties: StrAny, context: StrAny) -> None: headers = _tracker_request_header(_WRITE_KEY) def _future_send() -> None: - # import time - # start_ts = time.time_ns() + import time + + start_ts = time.time_ns() resp = requests.post( _ANON_TRACKER_ENDPOINT, headers=headers, json=payload, timeout=_REQUEST_TIMEOUT ) - # end_ts = time.time_ns() - # elapsed_time = (end_ts - start_ts) / 10e6 + end_ts = time.time_ns() + elapsed_time = (end_ts - start_ts) / 10e6 # print(f"SENDING TO TRACKER done: {elapsed_time}ms Status: {resp.status_code}") # handle different failure cases if resp.status_code not in [200, 204]: logger.debug( - f"Tracker request returned a {resp.status_code} response. Body: {resp.text}" + f"Tracker request returned a {resp.status_code} response in {elapsed_time}ms. Body:" + f" {resp.text}" ) else: + logger.debug( + f"Tracker request returned a {resp.status_code} response in {elapsed_time}ms.s" + ) if resp.status_code == 200: # parse the response if available data = resp.json() diff --git a/dlt/common/runtime/telemetry.py b/dlt/common/runtime/telemetry.py index db4a74b07..c474ea8ef 100644 --- a/dlt/common/runtime/telemetry.py +++ b/dlt/common/runtime/telemetry.py @@ -22,7 +22,7 @@ def start_telemetry(config: RuntimeConfiguration) -> None: # enable telemetry only once global _TELEMETRY_STARTED - if _TELEMETRY_STARTED: + if is_telemetry_started(): return if config.sentry_dsn: @@ -46,8 +46,7 @@ def start_telemetry(config: RuntimeConfiguration) -> None: @atexit.register def stop_telemetry() -> None: - global _TELEMETRY_STARTED - if not _TELEMETRY_STARTED: + if not is_telemetry_started(): return try: @@ -63,6 +62,7 @@ def stop_telemetry() -> None: disable_platform_tracker() + global _TELEMETRY_STARTED _TELEMETRY_STARTED = False diff --git a/dlt/common/utils.py b/dlt/common/utils.py index 311d2c967..4d9859385 100644 --- a/dlt/common/utils.py +++ b/dlt/common/utils.py @@ -26,6 +26,7 @@ from typing import ( Dict, MutableMapping, Iterator, + Generator, Optional, Sequence, Set, @@ -48,6 +49,7 @@ from dlt.common.exceptions import ( ValueErrorWithKnownValues, ) from dlt.common.typing import AnyFun, StrAny, DictStrAny, StrStr, TAny, TFun, Generic +from dlt.common.warnings import Dlt100DeprecationWarning, deprecated T = TypeVar("T") @@ -162,7 +164,9 @@ def flatten_list_of_str_or_dicts(seq: Sequence[Union[StrAny, str]]) -> DictStrAn return o -def flatten_list_or_items(_iter: Union[Iterable[TAny], Iterable[List[TAny]]]) -> Iterator[TAny]: +def flatten_list_or_items( + _iter: Union[Iterable[TAny], Iterable[List[TAny]]] +) -> Generator[TAny, None, None]: for items in _iter: if isinstance(items, List): yield from items @@ -327,8 +331,9 @@ def map_nested_values_in_place( # keep old name for backwards compatibility -# dlt+ needs to be updated -map_nested_in_place = map_nested_values_in_place +map_nested_in_place = deprecated( + "Use `map_nested_values_in_place` instead.", category=Dlt100DeprecationWarning +)(map_nested_values_in_place) def map_nested_keys_in_place( diff --git a/dlt/extract/resource.py b/dlt/extract/resource.py index 9846f08a6..50f517ea8 100644 --- a/dlt/extract/resource.py +++ b/dlt/extract/resource.py @@ -7,6 +7,7 @@ from typing import ( Callable, Iterable, Iterator, + Generator, Type, Union, Any, @@ -615,7 +616,7 @@ class DltResource(Iterable[TDataItem], DltResourceHints): self.pipe_data_from(self.from_data(data, name="iter_" + uniq_id(4))) return self - def __iter__(self) -> Iterator[TDataItem]: + def __iter__(self) -> Generator[TDataItem, None, None]: """Opens iterator that yields the data items from the resources in the same order as in Pipeline class. A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 7b4dbdc2a..bc4c2b975 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -6,7 +6,7 @@ from typing import ( ClassVar, Dict, Iterable, - Iterator, + Generator, List, Optional, Sequence, @@ -186,7 +186,7 @@ class DltResourceDict(Dict[str, DltResource]): ) @contextlib.contextmanager - def _add_multiple_resources(self) -> Iterator[None]: + def _add_multiple_resources(self) -> Generator[TDataItem, None, None]: # temporarily block cloning when single resource is added try: self._suppress_clone_on_setitem = True @@ -596,7 +596,7 @@ class DltSource(Iterable[TDataItem]): self.schema.clone(with_name=with_name), self.section, list(self._resources.values()) ) - def __iter__(self) -> Iterator[TDataItem]: + def __iter__(self) -> Generator[TDataItem, None, None]: """Opens iterator that yields the data items from all the resources within the source in the same order as in Pipeline class. A read-only state is provided, initialized from active pipeline state. The state is discarded after the iterator is closed. diff --git a/dlt/hub.py b/dlt/hub.py new file mode 100644 index 000000000..05e16cabd --- /dev/null +++ b/dlt/hub.py @@ -0,0 +1,8 @@ +"""A collection of dltHub Features""" + +try: + from dlt_plus import transformation + + __all__ = ["transformation"] +except ImportError: + pass diff --git a/docs/website/docs/dlt-ecosystem/destinations/databricks.md b/docs/website/docs/dlt-ecosystem/destinations/databricks.md index 97cc753a6..b56e60b21 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/databricks.md +++ b/docs/website/docs/dlt-ecosystem/destinations/databricks.md @@ -17,7 +17,7 @@ There are two options to run dlt pipelines and load data: * Run dlt pipelines directly within [Databricks notebooks](#direct-load-databricks-managed-volumes) without explicitly providing credentials. :::note -If you'd like to load data to Databricks Managed Iceberg tables, use [dlt+ Iceberg destination](../../plus/ecosystem/iceberg#unity-catalog) +If you'd like to load data to Databricks Managed Iceberg tables, use [dltHub Iceberg destination](https://info.dlthub.com/waiting-list) ::: ## Install dlt with Databricks diff --git a/docs/website/docs/dlt-ecosystem/destinations/iceberg.md b/docs/website/docs/dlt-ecosystem/destinations/iceberg.md index 1220c6366..cbd912359 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/iceberg.md +++ b/docs/website/docs/dlt-ecosystem/destinations/iceberg.md @@ -20,8 +20,8 @@ While ephemeral catalogs make it easy to get started with Iceberg, it comes with - the latest manifest file needs to be searched for using file listing—this can become slow with large tables, especially in cloud object stores ::: -:::tip dlt+ -If you're interested in a multi-user cloud experience and integration with vendor catalogs, such as Polaris or Unity Catalog, check out [dlt+](../../plus/ecosystem/iceberg.md). +:::tip dltHub Features +If you're interested in a multi-user cloud experience and integration with vendor catalogs, such as Polaris or Unity Catalog, check out [dltHub Iceberg destination](https://info.dlthub.com/waiting-list). ::: ## Iceberg dependencies diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md index 703e31a3b..72e7e935e 100644 --- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md +++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md @@ -6,10 +6,6 @@ keywords: [transform, dbt, runner] # Transform data with dbt -:::tip dlt+ -If you want to generate your dbt models automatically, check out [dlt+](../../../plus/features/transformations/dbt-transformations.md). -::: - [dbt](https://github.com/dbt-labs/dbt-core) is a framework that allows for the simple structuring of your transformations into DAGs. The benefits of using dbt include: - End-to-end cross-db compatibility for dlt→dbt pipelines. diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md index aeeb59fe4..30228f7af 100644 --- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md +++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md @@ -6,10 +6,6 @@ keywords: [transform, sql] # dbt Cloud client and helper functions -:::tip dlt+ -If you want to generate your dbt models automatically, check out [dlt+](../../../plus/features/transformations/dbt-transformations.md). -::: - ## API client The dbt Cloud Client is a Python class designed to interact with the dbt Cloud API (version 2). diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md index 2369bff9c..7ad3bc7b5 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md @@ -27,8 +27,8 @@ If you prefer to skip the tutorial and see the code example right away, check ou ### Supported databases -:::tip dlt+ -Check out [dlt+](../../../plus/ecosystem/ms-sql.md) for support of change tracking for Microsoft SQL Server. +:::tip dltHub Features +Check out [dltHub Features](https://info.dlthub.com/waiting-list) for support of change tracking for Microsoft SQL Server. ::: We support all [SQLAlchemy dialects](https://docs.sqlalchemy.org/en/20/dialects/), which include, but are not limited to, the following database engines: diff --git a/docs/website/docs/general-usage/credentials/setup.md b/docs/website/docs/general-usage/credentials/setup.md index 95fa3894b..1ed383546 100644 --- a/docs/website/docs/general-usage/credentials/setup.md +++ b/docs/website/docs/general-usage/credentials/setup.md @@ -10,10 +10,6 @@ files or secure vaults. It understands both simple and verbose layouts of [confi ## Choose where to store configuration -:::tip dlt+ -To define your configuration (including sources, destinations, pipeline and parameters) in a declarative way using YAML files, check out [dlt+](../../plus/features/projects.md). -::: - `dlt` looks for configuration and secrets in various locations (environment variables, toml files or secure vaults) through **config providers** that are queried when your pipeline runs. You can pick a single location or combine them - for example, define secret `api_key` in environment variables and `api_url` in a TOML file. Providers are queried in the following order: 1. [Environment Variables](#environment-variables): If a value is found in an environment variable, `dlt` uses it and doesn't check lower-priority providers. diff --git a/docs/website/docs/hub/EULA.md b/docs/website/docs/hub/EULA.md new file mode 100644 index 000000000..9930adf1a --- /dev/null +++ b/docs/website/docs/hub/EULA.md @@ -0,0 +1,187 @@ +# dltHub Software EULA +Latest Update: 2024-07-30 + +READ THIS dltHUB SOFTWARE END USER LICENSE AGREEMENT CAREFULLY. IT CONSTITUTES A LEGALLY BINDING AGREEMENT AND GOVERNS YOUR USE OF dltHub’S SOFTWARE (DEFINED BELOW). BY INSTALLING AND/OR USING SUCH SOFTWARE, YOU ARE INDICATING THAT YOU AGREE TO THE TERMS AND CONDITIONS SET FORTH IN THIS AGREEMENT. IF YOU DO NOT AGREE WITH SUCH TERMS AND CONDITIONS, YOU MAY NOT INSTALL OR USE ANY OF THE SOFTWARE. IF YOU ARE INSTALLING OR USING THE SOFTWARE ON BEHALF OF YOUR EMPLOYER OR ANOTHER ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS ON BEHALF OF SUCH EMPLOYER OR OTHER ENTITY. YOU FURTHER AGREE THAT dltHub MAY DIRECTLY ENFORCE THIS AGREEMENT AGAINST YOU IN THE CASE OF YOUR BREACH OF THIS END USER LICENSE AGREEMENT, AND YOU WAIVE ANY OBJECTION REGARDING dltHub’S STANDING TO DO SO. + +The End User License Agreement (“EULA”) applies to your access, use, download or installation of dltHub Software, including the Python library software that you use in your code to create User Software and Products. + +In this EULA, we use the expression “dltHub Software” or “Software” to mean any software, algorithms, applications, and programs that are licensed by dltHub to implement, run, write, manage data processing pipelines that extract, load and transform data (User Software and Products), including any Application Programming Interface (“API”), software development kits (“SDK”), libraries, code and programs, dlt OSS plugins which is licensed to you by dltHub, or otherwise made available to you. + +The EULA is between you, the person who accesses, downloads, installs, configures, or uses the dltHub Software (“you”, “your”, “User”) and ScaleVector GmbH, Rosenthaler Str. 42, 10178 Berlin (“dltHub”, “we”, “us”). + +To use the dltHub Software, you must accept the EULA. If you don’t agree with the EULA or any changes that we make to them, you must cease to use the dltHub Software (as defined below). The EULA is effective as soon as you accept them, including by downloading, executing, or installing the dltHub Software, and continues in full force for as long as you are using the dltHub Software. + +# Arbitration Notice + +The EULA contains a mandatory arbitration provision, as well as a class action waiver. By agreeing to the EULA, you also agree to resolve any disputes through individual arbitration, and you waive your right to submit a dispute to a judge or jury, or to participate in a class action, class arbitration, or representative action. + +However, if you’re based within the European Union, the above does not prevent you from bringing any action in justice before a court of law or any other governmental authority, to the extent you’re entitled to do so based on any statute applicable to you. + +## Contact Us + +If you have any questions on the EULA, or if you would like to provide us with any legal notices, you can reach out to us at legal@dlthub.com, or by mail at: ScaleVector GmbH, Rosenthaler Str. 42, 10178 Berlin + +Any notice that we send to you will be considered effective once sent to your email address or posted in the dltHub Software. Any notice to us will only be effective once received by our Legal Counsel. + +# Definitions + +Unless defined elsewhere in this EULA, the terms capitalized are defined below: + +* “dltHub Software” all of the software made available to you by dltHub, including dlt OSS. +* “dlt OSS” all of the software that is a part of dltHub Software and distributed under Open Source License. This software includes but is not limited to dlt Python library found at [https://github.com/dlt-hub/dlt](https://github.com/dlt-hub/dlt). +* “User Software” is any source code, libraries, applications and programs that integrates dltHub Software in the source code created by the User. +* “User Product” is any software, application, program or service created by the User that relies on dltHub Software in anyform, including command line interface and API calls and deployment packages. +* “Commercial Terms” means the commercial terms applicable between us and Customer regarding the dltHub Software and Services or any other custom commercial agreement that dltHub has negotiated with a Customer. +* “Customer” means an entity that provides you with access to the dltHub Software pursuant to the Commercial Terms. +* “Documentation” means the documentation that we make available online to describe the dltHub Software. +* “IP” means all rights, titles and interests in intellectual and industrial properties, including, without limitation, (a) all patents and applications; (b) all inventions, trade secrets, designs, methods, processes and know-how; (c) all copyrights, copyright registrations and applications, therefore, and all other rights corresponding throughout the world; (d) any and all trade names, corporate names, logos, common law, trademarks, trademark registrations and applications, therefore, and (e) all computer programs, applications or software whether in sources, object or executable code, and any proprietary rights in such programs, applications or software, including documentation and other materials related thereto. +* “License File” is a cryptographically signed text that enables dltHub Software to execute. License Files are created and distributed by us and contain an unique identifier that we assigned to you and encoded Special Terms including Trial expiration period. +* “Losses” means claims, penalties, fees, damages, fines, costs and expenses, including reasonable attorneys’ fees. +* “Representatives” means our affiliates, subsidiaries, employees, directors, officers, licensors, and service providers. +* “Services” means services that we provide to our customers, such as training and optimization of data processing pipelines, deployment and installation services, configuration, and technical support. +* “Third-Party Services” means any third-party software, application, website, technology, service, and product, including those connected or integrated to the dltHub Software, such as by way of APIs or SDKs (an “Integration”). + +# dltHub Software License + +Subject to this EULA, we grant you a non-exclusive, non-transferable, revocable, and non-sublicensable right and license to access, download, install, and use the dltHub Software (the “dltHub Software License”). For the avoidance of doubt, the dltHub Software License does not apply in cases where the OSS License is applicable to the OSS dlt + +Nothing in the dltHub Software License allows you to modify, distribute, resell, or sell the dltHub Software unless we specifically indicate otherwise in the Documentation. You may not decompile, disassemble, reverse engineer, modify, attempt to disable or circumvent any License File or copy protection in the Software, unless you’re permitted to do so by mandatory law. + +You are free to inspect, decompile or view source code if it is distributed with the Software. Consult Documentation for supported methods of accessing the source code. + +You are not permitted to distribute any derivative works from the decompiled or disassembled dltHub Software, except if permitted in Specific Terms. + +We reserve our rights to limit, condition, and modify the terms and conditions relating to the dltHub Software License at our convenience, upon prior reasonable written notice. + +## Specific Terms + +Notwithstanding anything to the contrary, in case of a conflict between the EULA and the specific terms and conditions applicable to the dltHub Software (the “Specific Terms”) the Specific Terms will prevail over the EULA. + +Without limiting the generality of the foregoing, the OSS Licenses are separate agreements. If you use OSS, you must agree with the OSS License attached to the OSS and comply with the requirements of the OSS License, including any disclaimers and attribution notices. For the avoidance of doubts, the EULA applies to the extent that it does not contradict the OSS License. The OSS Licenses are considered Specific Terms. + +## Specific Terms for the Self-Issued Trial License (“Self-Issued Trial Terms”) +If you select or generate a Self-Issued Trial License, the following Specific Terms apply and supplement the Agreement. Capitalized terms not defined here have the meanings in the Agreement. + +### No Production Use +Subject to your acceptance of these Terms, dltHub grants you a limited, non-exclusive, non-transferable, revocable license to use the dltHub Software solely for internal evaluation, development, testing and continuous integration in a non-production environment ("Development Mode"). + +Production Use is not permitted under these Terms. “Production Use” means any use (a) in an environment designated as "production" (b) with live or customer data; (c) in systems that are customer-facing or externally accessible; (c) for revenue-generating or operational workloads; + +Any Production Use or other use outside this grant is outside the scope of the license and constitutes copyright infringement and breach of the Agreement. Upon such use, this Self-Issued Trial License terminates automatically and without notice. + +### Self-Issuance; Acceptance; Runtime Notices +The Software may provide a command-line or in-product flow that allows you to self-issue a trial license file locally ("License File"). By generating or using a License File, you affirmatively accept these Self-Issued Trial Terms. + +The Software may display conspicuous notices (e.g., console messages or headers) indicating Development Mode. You must not remove or suppress such notices. + +### Environment Binding; Anonymous Issuance +The Trial License File is bound to your current local development environment using an anonymous, locally-generated identifier (e.g., a random string/UUID created on first run) stored on your machine. + +The identifier is not intended to contain personal data. + +The Trial License File is not transferable and may not be copied for use on other machines or environments. If your environment changes (e.g., VM migration or rebuild), you must self-issue a new Trial License File. + +Where the Self-Issued Trial is obtained without providing contact information (“Anonymous Issuance”), the prior written notice requirement in the Agreement’s “Changes” section does not apply to you. + +## Specific Terms for the Trial License (“Trial Terms”) + +If you selected Trial License type, the following Specific Terms apply: + +* dltHub Software will function until an expiration date encoded in the License File +* dltHub Software will expire automatically on the expiration date. +* In the event the License FIle expires, all licensed rights expire. +* We will not be liable for any Losses, and you will indemnify us from any Losses, resulting from you attempting to use dltHub Software past the expiration date. + +We do not restrict the scope of use and number and types of the installations of dltHub Software. + +License files contain a unique random identifier known to dltHub and associated with the license subject. Consult your Documentation on how to inspect your license file. + +## Specific Terms for the Commercial License (“Commercial Terms”) + +If you selected a Commercial License or you were provided such license by your organization additional Specific Terms may apply. + +License files contain a unique random identifier known to dltHub and associated with the license subject. Consult your Documentation on how to inspect your license file. + +# Our Marks + +Unless we specifically state otherwise, the dltHub Software License that is granted to you does not include any license or rights to our Marks. We reserve all our rights in our Marks, except as indicated otherwise. You agree not to display or use our Marks in any manner, including in connection with your use of User Software and Products, without our prior written consent. You will not remove any attribution or copyright notices in the EULA, including any Marks thereof. + +# Intellectual Property (IP) + +The dltHub Software and the related Documentation are licensed and not sold to you. Except as set forth otherwise, the dltHub Software, including the Documentation, is our IP. All rights not granted herein are reserved. + +Unless otherwise stated in the Specific Terms, any modifications, enhancements, or derivative works from the decompiled, disassembled or reverse engineered dltHub Software created by you or on your behalf, whether independently or in conjunction with dltHub, are the exclusive property of dltHub. Notwithstanding the foregoing, if the dltHub Software that you are using is subject to an OSS Licence that permits modifications and derivative works, those terms will take precedence. + +You agree to not remove any attribution or copyright notices, including in the dltHub Software and its Documentation. + +You may provide or we may ask you to provide suggestions, comments, input or other feedback (“Feedback”) regarding dltHub Software. If you provide us with any Feedback, then you grant us a perpetual, irrevocable, royalty-free, non-exclusive, worldwide, sublicensable, and transferable license to use, reproduce, publicly display, distribute, modify, and publicly perform the Feedback as we see fit. Any Feedback you choose to provide is given entirely voluntarily. You understand that you will not receive any compensation for your Feedback. We may use any Feedback you provide to improve the dltHub Software, or to create any products and services. + +# Maintenance of the dltHub Software + +We do not guarantee and are subject to no obligation to maintain current dltHub Software in its current conditions and availability. We may decide at all times to discontinue the provision of dltHub Software and this shall not entitle you to exercise any damage claims or other claims. We further have no obligation to issue updates, improvements or new versions to the dltHub Software (each a “New Version”). However, if we issue a New Version, we may cease to support previous versions, including by issuing security patches only for New Versions. You are solely responsible for installing or upgrading to New Versions, and we won’t be liable for any Losses if you fail to do so. + +# Disclaimers + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, AND EXCEPT AS EXPRESSLY STATED OTHERWISE IN THE EULA, THE dltHUB SOFTWARE IS PROVIDED TO YOU “AS IS” AND “AS AVAILABLE”: NEITHER US NOR OUR REPRESENTATIVES PROVIDE WARRANTIES, CONDITIONS, OR UNDERTAKINGS OF ANY KIND. THIS INCLUDES BUT ISN’T LIMITED TO, WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, DATA LOSS, MERCHANTABILITY, OR NON-INFRINGEMENT. + +Neither we nor our Representatives are responsible for Third-Party Services, including any Integrations. We do not represent nor warrant that Integrations will be possible, nor that the dltHub Software will be compatible with any Third-Party Services. Your use of any Third-Party Services is at your own risk, and you are responsible for (a) ensuring that you comply with their terms and conditions; (b) validating the security and privacy practices of Third-Party Services and (c) entering into appropriate terms and conditions with Third-Party Services. We make no warranties of any kind and assume no liability for any Losses resulting from Third-Party Services. We do not endorse any Third-Party Services. + +Without prejudice or limitation to the above, we won’t be held liable for any Losses resulting from any cause beyond our control. This includes but is not limited to, force majeure, changes to law or regulations, embargoes, war, terrorist acts, riots, fires, earthquakes, nuclear accidents, floods, strikes, power blackouts, volcanic action, zero-day attacks, distributed denial of services attacks, unusually severe weather conditions, and acts of hackers or third-party internet service providers. + +# Remedies + +In addition to any remedies in this EULA, you agree to indemnify and hold us and our Representatives harmless from any third-party claims for Losses resulting from your breach of this EULA. + +Your violation of this EULA may cause irreparable harm to us and our Representatives. Therefore, we have the right to seek injunctive relief or other equitable relief if you violate the EULA (meaning we may request a court order to stop you). + +# Limitation of Liability + +To the maximum extent permitted by law, you agree and acknowledge that neither dltHub nor our Representatives have any liability to you whatsoever (a) for indirect, consequential, punitive, special, or consequential damage, including any loss of data, profits, revenues, business opportunities, goodwill, or anticipated savings under any circumstances, even if based on negligence; (b) for any other Losses whatsoever that you incur from the use of the dltHub Software, or otherwise in connection with this EULA. + +For the avoidance of doubt, in no instance, we or our Representatives will be liable for any Losses you suffer if you use the dltHub Software in violation of the EULA, regardless of whether we terminate or suspend your account due to such violation. + +# Suspension; Termination + +You can terminate the EULA at any time by (a) discontinuing your use of the dltHub Software and (b) uninstalling the dltHub Software. + +Unless we agree otherwise with the Customer with whom you are associated, (a) we can terminate this EULA upon written notice to you, (b) we can suspend your right to access and use the dltHub Software if we reasonably suspect that you are in breach of the EULA. + +When the EULA is terminated for any reasons, all licenses and rights granted herein, or in any Specific Terms, are hereby terminated, unless specifically mentioned otherwise. You agree to discontinue the use of the dltHub Software, and uninstall the dltHub Software. All terms which by their nature should survive, will survive the termination. This includes any disclaimers and limitations of liability. + +# Governing Laws and Jurisdictions + +The laws of Germany will apply to the interpretation of the EULA (without regard to conflict of law rules or principles), and subject to the provisions below, you agree that disputes will exclusively be resolved in the competent courts of Berlin. If you’re entering into this EULA acting as a consumer, the preceding choice of venue of jurisdiction shall not apply. + +# Mandatory Arbitration + +Unless you’re located within the European Union, and in any case to the extent permitted under the law applicable in your case, all disputes arising out of or in connection with the present contract shall be finally settled under the Rules of Arbitration of the International Chamber of Commerce (“ICC”) by one arbitrator appointed in accordance with the said Rules, using ICC Case Connect, ICC’s digital case management platform connecting parties, arbitral tribunals, and the ICC Secretariat. The arbitration will occur in English, in the location in which you would resolve disputes under the courts of law as above stated. No award or procedural order made in the arbitration shall be published. + +## Class Action Waiver + +For the avoidance of doubts, the parties waive any right to assert any claims against the other party as a representative or member in any class or representative action, except where such waiver is prohibited by law or deemed by a court of law to be against public policy. + +# Changes + +Although we might have agreed to restrictions in Commercial Terms, as between you and us, you agree and understand that we can change this EULA from time to time by providing you with a reasonable prior written notice. If you don’t agree with these changes, you must cease to use the dltHub Software. + +We can also make changes to the dltHub Software from time to time, with, or without warning. If we believe that these changes can affect your User Software and Products, we will attempt to provide you with a prior written notice prior to making the changes. + +If we update this EULA, we will update the “Latest Update” date above to reflect the latest version. + +# Export Control + +Use of the dltHub Software for any dealings, engagement, or sale of goods/services linked directly or indirectly with jurisdictions that dltHub has deemed high risk, such as Cuba, Iran, North Korea, Syria, and the Crimea, Donetsk, and Luhansk Regions; or persons dltHub has deemed high risk, such as those individuals or entities named to a restricted person or party list of the United States of America, European Union or United Nations, including the sanctions lists maintained by the U.S. Office of Foreign Assets Control or the Denied Persons List or Entity List maintained by the U.S. Department of Commerce, is prohibited. + +It is prohibited to use our products and services to export directly or indirectly, re-export, sell, or supply accounting, trust or corporate formation, or management consulting services to any persons located in the Russian Federation. + +To the extent applicable to you, you must comply with (a) all applicable laws, including those relating to export control; (b) with the sanctions programs administered by the Office of Foreign Assets Control of the U.S. Department of the Treasury and (c) you will not directly or indirectly export, re-export, or transfer the dltHub Software, the Content or the Documentation to prohibited countries or individuals or permit their use by prohibited countries or individuals. + +# Interpretation + +You may not assign any of your rights under this EULA to anyone else. We may assign this EULA, and any of our rights and obligations hereunder to any other individual or entity at our discretion. + +If it turns out that a section of this EULA isn’t enforceable, then that section will be removed or edited as little as required, and the rest of the EULA will still be valid. The headers and sidebar text are provided only to make this EULA easier to read and understand. The fact that we wrote this EULA won’t affect the way the EULA is interpreted. If we don’t immediately take action on a violation of this EULA, we’re not giving up any rights under the Terms, and we may still take action at some point. A waiver on one occasion will not be a waiver of any right or remedy of any future occasion. + +You and we agree that no joint venture, partnership, employment, or agency relationship exists between us. + +The EULA, along with any Specific Terms, make up the entire agreement between us in relation to its subject matter and supersede all prior agreements, representations, and understandings. \ No newline at end of file diff --git a/docs/website/docs/plus/core-concepts/cache.md b/docs/website/docs/hub/core-concepts/cache.md similarity index 100% rename from docs/website/docs/plus/core-concepts/cache.md rename to docs/website/docs/hub/core-concepts/cache.md diff --git a/docs/website/docs/plus/core-concepts/datasets.md b/docs/website/docs/hub/core-concepts/datasets.md similarity index 100% rename from docs/website/docs/plus/core-concepts/datasets.md rename to docs/website/docs/hub/core-concepts/datasets.md diff --git a/docs/website/docs/plus/core-concepts/profiles.md b/docs/website/docs/hub/core-concepts/profiles.md similarity index 100% rename from docs/website/docs/plus/core-concepts/profiles.md rename to docs/website/docs/hub/core-concepts/profiles.md diff --git a/docs/website/docs/plus/core-concepts/project.md b/docs/website/docs/hub/core-concepts/project.md similarity index 100% rename from docs/website/docs/plus/core-concepts/project.md rename to docs/website/docs/hub/core-concepts/project.md diff --git a/docs/website/docs/plus/ecosystem/delta.md b/docs/website/docs/hub/ecosystem/delta.md similarity index 100% rename from docs/website/docs/plus/ecosystem/delta.md rename to docs/website/docs/hub/ecosystem/delta.md diff --git a/docs/website/docs/plus/ecosystem/iceberg.md b/docs/website/docs/hub/ecosystem/iceberg.md similarity index 100% rename from docs/website/docs/plus/ecosystem/iceberg.md rename to docs/website/docs/hub/ecosystem/iceberg.md diff --git a/docs/website/docs/plus/ecosystem/ms-sql.md b/docs/website/docs/hub/ecosystem/ms-sql.md similarity index 99% rename from docs/website/docs/plus/ecosystem/ms-sql.md rename to docs/website/docs/hub/ecosystem/ms-sql.md index e51dcd396..d33e5a5da 100644 --- a/docs/website/docs/plus/ecosystem/ms-sql.md +++ b/docs/website/docs/hub/ecosystem/ms-sql.md @@ -62,7 +62,7 @@ This approach ensures that you have a complete dataset from the initial load and Get the Change Tracking version **before you execute the initial load** to make sure you do not miss any updates that may happen during it. This may result in "replaying" a few changes that happen during the load, but this will not have any impact on the destination data due to the `merge` write disposition. ```py -from dlt_plus.sources.mssql import get_current_change_tracking_version +from dlt.hub.sources.mssql import get_current_change_tracking_version from sqlalchemy import create_engine connection_url = "mssql+pyodbc://username:password@your_server:port/YourDatabaseName?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes" diff --git a/docs/website/docs/plus/ecosystem/snowflake_plus.md b/docs/website/docs/hub/ecosystem/snowflake_plus.md similarity index 100% rename from docs/website/docs/plus/ecosystem/snowflake_plus.md rename to docs/website/docs/hub/ecosystem/snowflake_plus.md diff --git a/docs/website/docs/plus/features/ai.md b/docs/website/docs/hub/features/ai.md similarity index 100% rename from docs/website/docs/plus/features/ai.md rename to docs/website/docs/hub/features/ai.md diff --git a/docs/website/docs/plus/features/data-access.md b/docs/website/docs/hub/features/data-access.md similarity index 100% rename from docs/website/docs/plus/features/data-access.md rename to docs/website/docs/hub/features/data-access.md diff --git a/docs/website/docs/dlt-ecosystem/llm-tooling/mcp-server.md b/docs/website/docs/hub/features/mcp-server.md similarity index 95% rename from docs/website/docs/dlt-ecosystem/llm-tooling/mcp-server.md rename to docs/website/docs/hub/features/mcp-server.md index ea20bfee1..5bdc8ef08 100644 --- a/docs/website/docs/dlt-ecosystem/llm-tooling/mcp-server.md +++ b/docs/website/docs/hub/features/mcp-server.md @@ -6,7 +6,7 @@ keywords: [mcp, llm, agents, ai] # MCP Server -Currently, dltHub is [building two MCP servers](https://dlthub.com/blog/deep-dive-assistants-mcp-continue) that you can run locally and integrate with your preferred IDE. One server is for the open-source `dlt` library and the other integrates with `dlt+` features ([Learn more](../../plus/features/ai.md)). +Currently, dltHub is [building two MCP servers](https://dlthub.com/blog/deep-dive-assistants-mcp-continue) that you can run locally and integrate with your preferred IDE. One server is for the open-source `dlt` library and the other integrates with `dlt+` features ([Learn more](ai.md)). This page gives an overview of what we're building and includes detailed instructions to install the MCP in your favorite IDE. @@ -86,7 +86,7 @@ Then, to enable the MCP server and tool usage, several IDEs require you to enabl ### dlt+ MCP server -To run the `dlt+` MCP server, you will need to set your [dlt+ License](../../plus/getting-started/installation#licensing) globally in `~/.dlt/secrets.toml` or in an environment variable (must be set before lauching the IDE) and use `dlt mcp run_plus` in your configuration. If the `dlt+` license is missing, the dlt MCP server will be launched instead. You can tell the two apart by the tools, resources, and prompts available­. +To run the `dlt+` MCP server, you will need to set your [dlt+ License](../getting-started/installation#licensing) globally in `~/.dlt/secrets.toml` or in an environment variable (must be set before lauching the IDE) and use `dlt mcp run_plus` in your configuration. If the `dlt+` license is missing, the dlt MCP server will be launched instead. You can tell the two apart by the tools, resources, and prompts available­. ### Continue @@ -144,7 +144,7 @@ There's also a global configuration specs in JSON ### Claude Desktop -You need to [add a JSON configuration file](https://modelcontextprotocol.io/quickstart/user#2-add-the-filesystem-mcp-server) on your system. See our [full Claude Desktop tutorial](../../plus/features/ai.md) +You need to [add a JSON configuration file](https://modelcontextprotocol.io/quickstart/user#2-add-the-filesystem-mcp-server) on your system. See our [full Claude Desktop tutorial](ai.md) ```json { diff --git a/docs/website/docs/plus/features/project/index.md b/docs/website/docs/hub/features/project/index.md similarity index 100% rename from docs/website/docs/plus/features/project/index.md rename to docs/website/docs/hub/features/project/index.md diff --git a/docs/website/docs/plus/features/project/overview.md b/docs/website/docs/hub/features/project/overview.md similarity index 100% rename from docs/website/docs/plus/features/project/overview.md rename to docs/website/docs/hub/features/project/overview.md diff --git a/docs/website/docs/plus/features/project/python-api.md b/docs/website/docs/hub/features/project/python-api.md similarity index 100% rename from docs/website/docs/plus/features/project/python-api.md rename to docs/website/docs/hub/features/project/python-api.md diff --git a/docs/website/docs/plus/features/project/source-configuration.md b/docs/website/docs/hub/features/project/source-configuration.md similarity index 100% rename from docs/website/docs/plus/features/project/source-configuration.md rename to docs/website/docs/hub/features/project/source-configuration.md diff --git a/docs/website/docs/plus/features/projects.md b/docs/website/docs/hub/features/projects.md similarity index 100% rename from docs/website/docs/plus/features/projects.md rename to docs/website/docs/hub/features/projects.md diff --git a/docs/website/docs/plus/features/quality/data-quality.md b/docs/website/docs/hub/features/quality/data-quality.md similarity index 100% rename from docs/website/docs/plus/features/quality/data-quality.md rename to docs/website/docs/hub/features/quality/data-quality.md diff --git a/docs/website/docs/plus/features/quality/tests.md b/docs/website/docs/hub/features/quality/tests.md similarity index 100% rename from docs/website/docs/plus/features/quality/tests.md rename to docs/website/docs/hub/features/quality/tests.md diff --git a/docs/website/docs/plus/features/transformations/dbt-transformations.md b/docs/website/docs/hub/features/transformations/dbt-transformations.md similarity index 100% rename from docs/website/docs/plus/features/transformations/dbt-transformations.md rename to docs/website/docs/hub/features/transformations/dbt-transformations.md diff --git a/docs/website/docs/general-usage/transformations/index.md b/docs/website/docs/hub/features/transformations/index.md similarity index 94% rename from docs/website/docs/general-usage/transformations/index.md rename to docs/website/docs/hub/features/transformations/index.md index 410495863..21d93e6ad 100644 --- a/docs/website/docs/general-usage/transformations/index.md +++ b/docs/website/docs/hub/features/transformations/index.md @@ -7,9 +7,9 @@ keywords: [transformation, dataset, sql, pipeline, ibis, arrow] import Admonition from "@theme/Admonition"; -dlt+}> +dltHub}>

- Transformations are part of `dlt+` - a commercial extensions of OSS `dlt`. This module is currently available in 🧪 preview to selected users and projects. + Transformations are part of **dltHub**. This module is currently available in 🧪 preview to selected users and projects. Contact us to get your [trial license](https://dlthub.com/legal/dlt-plus-eula)
[Copyright © 2025 dltHub Inc. All rights reserved.](https://dlthub.com/legal/dlt-plus-eula) @@ -18,7 +18,7 @@ import Admonition from "@theme/Admonition"; `dlt transformations` let you build new tables or full datasets from datasets that have _already_ been ingested with `dlt`. `dlt transformations` are written and run in a very similar fashion to dlt source and resources. `dlt transformations` require you to have loaded data to a location, for example a local duckdb database, a bucket or a warehouse on which the transformations may be executed. `dlt transformations` are fully supported for all of our sql destinations including all filesystem and bucket formats. -You create them with the `@dlt.transformation` decorator which has the same signature as the `@dlt.resource` decorator, but does not yield items but rather a SQL query including the resulting +You create them with the `@dlt.hub.transformation` decorator which has the same signature as the `@dlt.resource` decorator, but does not yield items but rather a SQL query including the resulting column schema. dlt transformations support the same write_dispositions per destination as dlt resources do. ## Motivations @@ -38,7 +38,7 @@ A few real-world scenarios where dlt transformations can be useful: ## Quick-start in three simple steps -For the example below you can copy–paste everything into one script and run it. It is useful to know how to use dlt [Datasets and Relations](../dataset-access/dataset.md), since these are heavily used in transformations. +For the example below you can copy–paste everything into one script and run it. It is useful to know how to use dlt [Datasets and Relations](../../../general-usage/dataset-access/dataset.md), since these are heavily used in transformations. ### 1. Load some example data @@ -49,7 +49,7 @@ The snippets below assume that we have a simple fruitshop dataset as produced by ### 1.1 Use the fruitshop template as a starting point -Alternatively, you can follow the code examples below by creating a new pipeline with the fruitshop template and running transformatth scenarios.ions on the resulting dataset: +Alternatively, you can follow the code examples below by creating a new pipeline with the fruitshop template and running transformations on the resulting dataset: ```sh dlt init fruitshop duckdb @@ -75,14 +75,14 @@ to the correct new schema, and you could also set a different write disposition ## Defining a transformation :::info -Most of the following examples will be using the ibis expressions of the `dlt.Dataset`. Read the detailed [dataset docs](../../general-usage/dataset-access/dataset) to learn how to use these. +Most of the following examples will be using the ibis expressions of the `dlt.Dataset`. Read the detailed [dataset docs](../../../general-usage/dataset-access/dataset.md) to learn how to use these. ::: * **Decorator arguments** mirror those accepted by `@dlt.resource`. * The transformation function signature must contain at least one `dlt.Dataset` which is used inside the function to create the transformation SQL statements and calculate the resulting schema update. -* Yields a `Relation` created with ibis expressions or a select query which will be materialized into the destination table. If the first item yielded is a valid sql query or relation object, data will be interpreted as a transformation. In all other cases, the tranformation decorator will work like any other resource. +* Yields a `Relation` created with ibis expressions or a select query which will be materialized into the destination table. If the first item yielded is a valid sql query or relation object, data will be interpreted as a transformation. In all other cases, the transformation decorator will work like any other resource. ## Loading to other datasets @@ -112,13 +112,13 @@ Below we load the data from our local DuckDB instance to a Postgres instance. dl ### Yielding multiple transformations from one transformation resource -`dlt transformations` may also yield more than one transformation instruction. If no further table name hints are supplied, the result will be a union of the yielded transformation instructions. dlt will take care of the necessary schema migrations, you will just need to ensure that no columns are marked as non-nullable that are missing from one of the transformation insturctions: +`dlt transformations` may also yield more than one transformation instruction. If no further table name hints are supplied, the result will be a union of the yielded transformation instructions. dlt will take care of the necessary schema migrations, you will just need to ensure that no columns are marked as non-nullable that are missing from one of the transformation instructions: ### Supplying additional hints -You may supply column and table hints the same way you do for regular resources. `dlt` will derive schema hints from your query, but in some cases you may need to change or amend hints, such as making columsn nullable for the example above or change the precision or type of a column to make it work with a given target destination (if different from the source) +You may supply column and table hints the same way you do for regular resources. `dlt` will derive schema hints from your query, but in some cases you may need to change or amend hints, such as making columns nullable for the example above or change the precision or type of a column to make it work with a given target destination (if different from the source) @@ -195,7 +195,7 @@ Additionally, column names are normalized according to the naming schema selecte This allows dlt to maintain data lineage and enables features like incremental loading and merging, even when working with raw SQL queries. :::info -The normalization described here, including automatic injection or replacement of dlt columns, applies only to SQL-based transformations. Python-based transformations, such as those using dataframes or arrow tables, follow the [regular normalization process](../../reference/explainers/how-dlt-works#normalize). +The normalization described here, including automatic injection or replacement of dlt columns, applies only to SQL-based transformations. Python-based transformations, such as those using dataframes or arrow tables, follow the [regular normalization process](../../../reference/explainers/how-dlt-works.md#normalize). ::: ### Query Processing diff --git a/docs/website/docs/plus/features/transformations/setup.md b/docs/website/docs/hub/features/transformations/setup.md similarity index 95% rename from docs/website/docs/plus/features/transformations/setup.md rename to docs/website/docs/hub/features/transformations/setup.md index ac74b896a..78b2b0239 100644 --- a/docs/website/docs/plus/features/transformations/setup.md +++ b/docs/website/docs/hub/features/transformations/setup.md @@ -8,7 +8,6 @@ dlt+ provides a powerful mechanism for executing transformations on your data us A transformation consists of functions that modify data stored in a [cache](../../core-concepts/cache.md). These transformations can be implemented using: * [dbt models](./dbt-transformations.md) -* [🧪 Python user-defined functions](./python-transformations.md) By combining a cache and transformations, you can efficiently process data loaded via dlt and move it to a new destination. @@ -87,7 +86,6 @@ dlt transformation render-t-layer This will generate transformation files inside the `./transformations` folder. Depending on the engine: -* For Python transformations: a Python script with transformation functions ([learn more](./python-transformations.md)) * For dbt transformations: dbt models ([learn more](./dbt-transformations.md)) Each generated transformation includes models for managing incremental loading states via `dlt_load_id`. diff --git a/docs/website/docs/general-usage/transformations/transformation-snippets.py b/docs/website/docs/hub/features/transformations/transformation-snippets.py similarity index 95% rename from docs/website/docs/general-usage/transformations/transformation-snippets.py rename to docs/website/docs/hub/features/transformations/transformation-snippets.py index 979420f57..2604d16ba 100644 --- a/docs/website/docs/general-usage/transformations/transformation-snippets.py +++ b/docs/website/docs/hub/features/transformations/transformation-snippets.py @@ -30,7 +30,7 @@ def fruitshop_pipeline() -> dlt.Pipeline: def basic_transformation_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_START basic_transformation - @dlt.transformation() + @dlt.hub.transformation def copied_customers(dataset: dlt.Dataset) -> Any: customers_table = dataset["customers"] yield customers_table.order_by("name").limit(5) @@ -49,7 +49,7 @@ def basic_transformation_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: def orders_per_user_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_START orders_per_user - @dlt.transformation(name="orders_per_user", write_disposition="merge") + @dlt.hub.transformation(name="orders_per_user", write_disposition="merge") def orders_per_user(dataset: dlt.Dataset) -> Any: purchases = dataset.table("purchases", table_type="ibis") yield purchases.group_by(purchases.customer_id).aggregate(order_count=purchases.id.count()) @@ -64,7 +64,7 @@ def loading_to_other_datasets_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: import dlt from dlt.destinations import duckdb - @dlt.transformation() + @dlt.hub.transformation def copied_customers(dataset: dlt.Dataset) -> Any: customers_table = dataset["customers"] yield customers_table.order_by("name").limit(5) @@ -92,13 +92,13 @@ def multiple_transformations_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: @dlt.source def my_transformations(dataset: dlt.Dataset) -> Any: - @dlt.transformation(write_disposition="append") + @dlt.hub.transformation(write_disposition="append") def enriched_purchases(dataset: dlt.Dataset) -> Any: purchases = dataset.table("purchases", table_type="ibis") customers = dataset.table("customers", table_type="ibis") yield purchases.join(customers, purchases.customer_id == customers.id) - @dlt.transformation(write_disposition="replace") + @dlt.hub.transformation(write_disposition="replace") def total_items_sold(dataset: dlt.Dataset) -> Any: purchases = dataset.table("purchases", table_type="ibis") yield purchases.aggregate(total_qty=purchases.quantity.sum()) @@ -118,7 +118,7 @@ def multiple_transformation_instructions_snippet(fruitshop_pipeline: dlt.Pipelin import dlt # this (probably nonsensical) transformation will create a union of the customers and purchases tables - @dlt.transformation(write_disposition="append") + @dlt.hub.transformation(write_disposition="append") def union_of_tables(dataset: dlt.Dataset) -> Any: yield dataset.customers yield dataset.purchases @@ -131,7 +131,7 @@ def supply_hints_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: import dlt # change precision and scale of the price column - @dlt.transformation( + @dlt.hub.transformation( write_disposition="append", columns={"price": {"precision": 10, "scale": 2}} ) def precision_change(dataset: dlt.Dataset) -> Any: @@ -151,7 +151,7 @@ def sql_queries_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_START sql_queries # @@@DLT_SNIPPET_START sql_queries_short # Convert the transformation above that selected the first 5 customers to a sql query - @dlt.transformation() + @dlt.hub.transformation def copied_customers(dataset: dlt.Dataset) -> Any: customers_table = dataset(""" SELECT * @@ -164,7 +164,7 @@ def sql_queries_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_END sql_queries_short # Joins and other more complex queries are also possible of course - @dlt.transformation() + @dlt.hub.transformation def enriched_purchases(dataset: dlt.Dataset) -> Any: enriched_purchases = dataset(""" SELECT customers.name, purchases.quantity @@ -176,7 +176,7 @@ def sql_queries_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # You can even use a different dialect than the one used by the destination by supplying the dialect parameter # dlt will compile the query to the right destination dialect - @dlt.transformation() + @dlt.hub.transformation def enriched_purchases_postgres(dataset: dlt.Dataset) -> Any: enriched_purchases = dataset( """ @@ -206,7 +206,7 @@ def sql_queries_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: def arrow_dataframe_operations_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_START arrow_dataframe_operations - @dlt.transformation() + @dlt.hub.transformation def copied_customers(dataset: dlt.Dataset) -> Any: # get full customers table as arrow table customers = dataset.customers.arrow() @@ -218,7 +218,7 @@ def arrow_dataframe_operations_snippet(fruitshop_pipeline: dlt.Pipeline) -> None yield sorted_customers.slice(0, 5) # Example tables (replace with your actual data) - @dlt.transformation() + @dlt.hub.transformation def enriched_purchases(dataset: dlt.Dataset) -> Any: # get both fully tables as dataframes purchases = dataset.purchases.df() @@ -258,7 +258,7 @@ def computed_schema_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: def column_level_lineage_snippet(fruitshop_pipeline: dlt.Pipeline) -> None: # @@@DLT_SNIPPET_START column_level_lineage - @dlt.transformation() + @dlt.hub.transformation def enriched_purchases(dataset: dlt.Dataset) -> Any: enriched_purchases = dataset(""" SELECT customers.name, purchases.quantity @@ -307,7 +307,7 @@ def in_transit_transformations_snippet() -> None: transit_pipeline.run(source) # load aggregated data to a warehouse destination - @dlt.transformation() + @dlt.hub.transformation def orders_per_store(dataset: dlt.Dataset) -> Any: orders = dataset.table("orders", table_type="ibis") stores = dataset.table("stores", table_type="ibis") @@ -331,7 +331,7 @@ def incremental_transformations_snippet(fruitshop_pipeline: dlt.Pipeline) -> Non # @@@DLT_SNIPPET_START incremental_transformations from dlt.pipeline.exceptions import PipelineNeverRan - @dlt.transformation( + @dlt.hub.transformation( write_disposition="append", primary_key="id", ) @@ -344,7 +344,7 @@ def incremental_transformations_snippet(fruitshop_pipeline: dlt.Pipeline) -> Non max_pimary_key_expr = output_dataset.table( "cleaned_customers", table_type="ibis" ).id.max() - max_pimary_key = output_dataset(max_pimary_key_expr).scalar() + max_pimary_key = output_dataset(max_pimary_key_expr).fetchscalar() except PipelineNeverRan: # we get this exception if the destination dataset has not been run yet # so we can assume that all customers are new diff --git a/docs/website/docs/plus/getting-started/advanced_tutorial.md b/docs/website/docs/hub/getting-started/advanced_tutorial.md similarity index 100% rename from docs/website/docs/plus/getting-started/advanced_tutorial.md rename to docs/website/docs/hub/getting-started/advanced_tutorial.md diff --git a/docs/website/docs/plus/getting-started/installation.md b/docs/website/docs/hub/getting-started/installation.md similarity index 100% rename from docs/website/docs/plus/getting-started/installation.md rename to docs/website/docs/hub/getting-started/installation.md diff --git a/docs/website/docs/plus/getting-started/tutorial.md b/docs/website/docs/hub/getting-started/tutorial.md similarity index 100% rename from docs/website/docs/plus/getting-started/tutorial.md rename to docs/website/docs/hub/getting-started/tutorial.md diff --git a/docs/website/docs/hub/intro.md b/docs/website/docs/hub/intro.md new file mode 100644 index 000000000..4c2dc5a99 --- /dev/null +++ b/docs/website/docs/hub/intro.md @@ -0,0 +1,44 @@ +--- +title: Introduction +description: Introduction to dlt+ +--- + +# What is dlt+? + +![dlt+](/img/slot-machine-gif.gif) + +dlt+ is an commercial extension to the open-source data load tool (dlt). It augments it with a set of features like transformations, data validations, +iceberg with full catalog support and provides a yaml interface to define data platforms. dlt+ features include: + +- [@dlt.hub.transformation](features/transformations/index.md) - powerful Python decorator to build transformation pipelines and notebooks +- [Project](features/projects.md): a declarative YAML interface that allows any team member to easily define sources, destinations, and pipelines. +- [dbt transformations](features/transformations/dbt-transformations.md): a staging layer for data transformations, combining a local cache with schema enforcement, debugging tools, and integration with existing data workflows. +- [Iceberg support](ecosystem/iceberg.md) +- [Secure data access and sharing](features/data-access.md) +- [AI workflows](features/ai.md): agents to augment your data engineering team. + +To get started with dlt+, install the library using pip (Python 3.9-3.12): + +```sh +pip install dlt-plus +``` + +You can try out any features by self-issuing a trial license. You can use such license for evaluation, development and testing. +Trial license are issued off-line using `dlt license` command: + +1. Display a list of available features +```sh +dlt license scopes +``` + +2. Issue license for the feature you want to test. + +```sh +dlt license issue dlthub.transformation +``` + +The command above will enable access to new `@dlt.hub.transformation` decorator. Note that you may +self issue licenses several times and the command above will carry-over features from previously issued license. + +3. Do not forget to read our [EULA](EULA.md) and [Special Terms](EULA.md#specific-terms-for-the-self-issued-trial-license-self-issued-trial-terms) +for self issued licenses. diff --git a/docs/website/docs/plus/production/observability.md b/docs/website/docs/hub/production/observability.md similarity index 100% rename from docs/website/docs/plus/production/observability.md rename to docs/website/docs/hub/production/observability.md diff --git a/docs/website/docs/plus/production/pipeline-runner.md b/docs/website/docs/hub/production/pipeline-runner.md similarity index 100% rename from docs/website/docs/plus/production/pipeline-runner.md rename to docs/website/docs/hub/production/pipeline-runner.md diff --git a/docs/website/docs/plus/reference.md b/docs/website/docs/hub/reference.md similarity index 100% rename from docs/website/docs/plus/reference.md rename to docs/website/docs/hub/reference.md diff --git a/docs/website/docs/plus/features/transformations/index.md b/docs/website/docs/plus/features/transformations/index.md deleted file mode 100644 index 0c7c9b70b..000000000 --- a/docs/website/docs/plus/features/transformations/index.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -title: "Local transformations" -description: Run local transformations with dlt+ Cache -keywords: ["dlt+", "transformations", "cache", "dbt"] ---- -import DocCardList from '@theme/DocCardList'; - -As part of dlt+, we provide a local transformation [cache](../../core-concepts/cache.md) — a staging layer for data transformations allowing you to test, validate, and debug data pipelines without running everything in the warehouse. With local transformations, you can: - -* Run transformations locally, eliminating the need to wait for warehouse queries. -* Validate the schema before loading to catch mismatches early. -* Test without incurring cloud costs, as in-memory execution prevents wasted compute. - -Local transformations are built on DuckDB, Arrow, and dbt, so they work with your existing stack. - -:::caution -The local transformations feature is currently in the early access phase. We recommend waiting for general access before using it in production. -::: - - - diff --git a/docs/website/docs/plus/features/transformations/python-transformations.md b/docs/website/docs/plus/features/transformations/python-transformations.md deleted file mode 100644 index f98179d4a..000000000 --- a/docs/website/docs/plus/features/transformations/python-transformations.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: "Python-based transformations 🧪" -description: Define transformations in Python ---- - -:::caution -🚧 This feature is under development, and the interface may change in future releases. Interested in becoming an early tester? [Join dlt+ early access](https://info.dlthub.com/waiting-list). -::: - -dlt+ allows you to define Arrow-based transformations that operate on a [cache](../../core-concepts/cache.md). The actual transformation code is located in the `./transformations` folder. -In this section, you will learn how you can define Arrow-based transformations with Python. - -## Generate template - -Since this feature is still under development and documentation is limited, we recommend starting with a template. You can generate one using the following command: - -:::note -Make sure you have [configured your cache and transformation](./setup#configure-dltyml-file) in the `dlt.yml` file before running the command below. -::: - -```sh -dlt transformation render-t-layer -``` - -Running this command will create a new set of transformations inside the `./transformations` folder. The generated template includes: - -* Transformation functions that manage incremental loading state based on `dlt_load_id`. -* Two transformation functions that implement user-defined transformations. -* A staging view, which pre-selects only rows eligible for the current transformation run. -* A main output table, which initially just forwards all incoming rows unchanged. - -If you run the generated transformations without modifying them, the execution will fail. This happens because your cache expects an aggregated table corresponding to the ``, but the newly created transformations do not include it. To resolve this, you can either: - -* Update your cache settings to match the new transformation. -* Implement a transformation that aligns with the expected table structure. - -## Understanding incremental transformations - -The default transformations generated by the scaffolding command work incrementally using the `dlt_load_id` from the incoming dataset. Here's how it works: - -1. The `dlt_loads` table is automatically available in the cache. -2. The transformation layer identifies which `load_id`s exist in the incoming dataset. -3. It selects only those `load_id`s that have not yet been processed (i.e., missing from the `processed_load_ids` table). -4. Once all transformations are complete, the `processed_load_ids` table is updated with the processed `load_id`s. -5. The cache saves the `processed_load_ids` table to the output dataset after each run. -6. When syncing the input dataset, the cache reloads the `processed_load_ids` table from the output dataset (if available). - -This mechanism allows incremental transformations to function seamlessly, even on ephemeral machines, where the cache is not retained between runs. - diff --git a/docs/website/docs/plus/intro.md b/docs/website/docs/plus/intro.md deleted file mode 100644 index 9bc866d28..000000000 --- a/docs/website/docs/plus/intro.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: Introduction -description: Introduction to dlt+ ---- - -# What is dlt+? - -![dlt+](/img/slot-machine-gif.gif) - -dlt+ is a framework for running dlt pipelines in production at scale. It is the commercial extension to the open-source data load tool (dlt). dlt+ features include: - -- [Project](../plus/features/projects.md): a declarative YAML interface that allows any team member to easily define sources, destinations, and pipelines. -- [Local transformations](../plus/features/transformations/index.md): a staging layer for data transformations, combining a local cache with schema enforcement, debugging tools, and integration with existing data workflows. -- [Data quality & tests](../plus/features/quality/tests.md) -- [Iceberg support](../plus/ecosystem/iceberg.md) -- [Secure data access and sharing](../plus/features/data-access.md) -- [AI workflows](../plus/features/ai.md): agents to augment your data engineering team. - -To get started with dlt+, install the library using pip (Python 3.9-3.12): - -```sh -pip install dlt-plus -``` - -:::caution -dlt+ requires a license to run. If you would like a trial, please join our [waiting list](https://info.dlthub.com/waiting-list). -::: diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js index ffa348388..8c8b40c24 100644 --- a/docs/website/sidebars.js +++ b/docs/website/sidebars.js @@ -27,7 +27,7 @@ function *walkSync(dir) { /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ const sidebars = { - tutorialSidebar: [ + docsSidebar: [ { type: 'category', label: 'Getting started', @@ -97,96 +97,6 @@ const sidebars = { 'general-usage/glossary' ] }, - { - type: 'category', - label: 'dlt+', - link: { - type: 'doc', - id: 'plus/intro', - }, - items: [ - { - type: 'category', - label: 'Getting started', - items: [ - 'plus/getting-started/installation', - 'plus/getting-started/tutorial', - 'plus/getting-started/advanced_tutorial', - ] - }, - { - type: 'category', - label: 'Core concepts', - items: [ - 'plus/core-concepts/project', - 'plus/core-concepts/cache', - 'plus/core-concepts/datasets', - 'plus/core-concepts/profiles', - ] - }, - { - type: 'category', - label: 'Sources & Destinations', - items: [ - 'plus/ecosystem/ms-sql', - 'plus/ecosystem/iceberg', - 'plus/ecosystem/delta', - 'plus/ecosystem/snowflake_plus', - ] - }, - { - type: 'category', - label: 'Features', - items: [ - 'plus/features/data-access', - { - type: 'category', - label: 'Project', - link: { - type: 'doc', - id: 'plus/features/project/index', - }, - items: [ - 'plus/features/project/overview', - 'plus/features/project/source-configuration', - 'plus/features/project/python-api', - ] - }, - { - type: 'category', - label: 'Local transformations', - link: { - type: 'doc', - id: 'plus/features/transformations/index', - }, - items: [ - 'plus/features/transformations/setup', - 'plus/features/transformations/dbt-transformations', - 'plus/features/transformations/python-transformations', - ] - }, - 'plus/features/ai', - { - type: 'category', - label: 'Data quality & tests', - items: [ - 'plus/features/quality/tests', - 'plus/features/quality/data-quality', - ] - }, - ] - }, - { - type: 'category', - label: 'Going to production', - items: [ - 'plus/production/pipeline-runner', - 'plus/production/observability', - ] - }, - 'plus/reference', - ] - }, { type: 'category', label: 'Sources', @@ -337,7 +247,6 @@ const sidebars = { slug: 'dlt-ecosystem/llm-tooling', }, items: [ - "dlt-ecosystem/llm-tooling/mcp-server", "dlt-ecosystem/llm-tooling/llm-native-workflow", ] }, @@ -561,11 +470,107 @@ const sidebars = { ] } */ - ] + ], + hubSidebar: [ + { + type: 'category', + label: 'dltHub Features', + link: { + type: 'doc', + id: 'hub/intro', + }, + items: [ + { + type: 'category', + label: 'Getting started', + items: [ + 'hub/getting-started/installation', + 'hub/getting-started/tutorial', + 'hub/getting-started/advanced_tutorial', + ] + }, + { + type: 'category', + label: 'Core concepts', + items: [ + 'hub/core-concepts/project', + 'hub/core-concepts/cache', + 'hub/core-concepts/datasets', + 'hub/core-concepts/profiles', + ] + }, + { + type: 'category', + label: 'Sources & Destinations', + items: [ + 'hub/ecosystem/ms-sql', + 'hub/ecosystem/iceberg', + 'hub/ecosystem/delta', + 'hub/ecosystem/snowflake_plus', + ] + }, + { + type: 'category', + label: 'Features', + items: [ + { + type: 'category', + label: 'Transformations', + link: { + type: 'doc', + id: 'hub/features/transformations/index', + }, + items: [ + 'hub/features/transformations/index', + ] + }, + 'hub/features/data-access', + { + type: 'category', + label: 'Project', + link: { + type: 'doc', + id: 'hub/features/project/index', + }, + items: [ + 'hub/features/project/overview', + 'hub/features/project/source-configuration', + 'hub/features/project/python-api', + ] + }, + { + type: 'category', + label: 'dbt generator', + link: { + type: 'doc', + id: 'hub/features/transformations/index', + }, + items: [ + 'hub/features/transformations/setup', + 'hub/features/transformations/dbt-transformations', + ] + }, + 'hub/features/ai', + "hub/features/mcp-server", + ] + }, + { + type: 'category', + label: 'Going to production', + items: [ + 'hub/production/pipeline-runner', + 'hub/production/observability', + ] + }, + 'hub/reference', + 'hub/EULA' + ] + }, + ], }; // insert examples -for (const item of sidebars.tutorialSidebar) { +for (const item of sidebars.docsSidebar) { if (item.label === 'Code examples') { for (let examplePath of walkSync("./docs_processed/examples")) { examplePath = examplePath.replace("docs_processed/", ""); @@ -579,7 +584,7 @@ for (const item of sidebars.tutorialSidebar) { // inject api reference if it exists if (fs.existsSync('./docs_processed/api_reference/sidebar.json')) { - for (const item of sidebars.tutorialSidebar) { + for (const item of sidebars.docsSidebar) { if (item.label === 'Reference') { item.items.splice(0,0,require("./docs_processed/api_reference/sidebar.json")); } diff --git a/docs/website/src/css/custom.css b/docs/website/src/css/custom.css index 63b6153df..6bb31670c 100644 --- a/docs/website/src/css/custom.css +++ b/docs/website/src/css/custom.css @@ -411,7 +411,7 @@ html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(3)>div>a::before { /* dlt+ */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(4)>div>a::before { +/* .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(4)>div>a::before { background-image: url(../../static/img/Plus-Inactive.svg); } @@ -427,147 +427,147 @@ html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(4) html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(4)>div>a::before, html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(4)>div>a::before { background-image: url(../../static/img/Plus-Active-1.svg); -} +} */ /* Sources */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(5)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(4)>div>a::before { background-image: url(../../static/img/Sources-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(5)>div>a::before, -.menu_vPEQ>ul>li:nth-child(5)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(4)>div>a::before, +.menu_vPEQ>ul>li:nth-child(4)>div>a::before { background-image: url(../../static/img/Sources-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(5)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(4)>div>a::before { background-image: url(../../static/img/Sources-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(5)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(5)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(4)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(4)>div>a::before { background-image: url(../../static/img/Sources-Active-1.svg); } /* Destinations */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(6)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(5)>div>a::before { background-image: url(../../static/img/Destinations-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(6)>div>a::before, -.menu_vPEQ>ul>li:nth-child(6)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(5)>div>a::before, +.menu_vPEQ>ul>li:nth-child(5)>div>a::before { background-image: url(../../static/img/Destinations-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(6)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(5)>div>a::before { background-image: url(../../static/img/Destinations-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(6)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(6)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(5)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(5)>div>a::before { background-image: url(../../static/img/Destinations-Active-1.svg); } /* Using dlt */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(7)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(6)>div>a::before { background-image: url(../../static/img/GeneralUsage-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(7)>div>a::before, -.menu_vPEQ>ul>li:nth-child(7)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(6)>div>a::before, +.menu_vPEQ>ul>li:nth-child(6)>div>a::before { background-image: url(../../static/img/GeneralUsage-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(7)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(6)>div>a::before { background-image: url(../../static/img/GeneralUsage-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(7)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(7)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(6)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(6)>div>a::before { background-image: url(../../static/img/GeneralUsage-Active-1.svg); } /* Deploying dlt */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(8)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(7)>div>a::before { background-image: url(../../static/img/UsingLoadedData-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(8)>div>a::before, -.menu_vPEQ>ul>li:nth-child(8)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(7)>div>a::before, +.menu_vPEQ>ul>li:nth-child(7)>div>a::before { background-image: url(../../static/img/UsingLoadedData-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(8)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(7)>div>a::before { background-image: url(../../static/img/UsingLoadedData-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(8)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(8)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(7)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(7)>div>a::before { background-image: url(../../static/img/UsingLoadedData-Active-1.svg); } /* Optimizing dlt */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(9)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(8)>div>a::before { background-image: url(../../static/img/Installation-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(9)>div>a::before, -.menu_vPEQ>ul>li:nth-child(9)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(8)>div>a::before, +.menu_vPEQ>ul>li:nth-child(8)>div>a::before { background-image: url(../../static/img/Installation-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(9)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(8)>div>a::before { background-image: url(../../static/img/Installation-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(9)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(9)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(8)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(8)>div>a::before { background-image: url(../../static/img/Installation-Active-1.svg); } /* Code Examples */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(10)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(9)>div>a::before { background-image: url(../../static/img/Howdltworks-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(10)>div>a::before, -.menu_vPEQ>ul>li:nth-child(10)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(9)>div>a::before, +.menu_vPEQ>ul>li:nth-child(9)>div>a::before { background-image: url(../../static/img/Howdltworks-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(10)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(9)>div>a::before { background-image: url(../../static/img/Howdltworks-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(10)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(10)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(9)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(9)>div>a::before { background-image: url(../../static/img/Howdltworks-Active-1.svg); } /* Reference */ -.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(11)>div>a::before { +.menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(10)>div>a::before { background-image: url(../../static/img/Reference-Inactive.svg); } -.menu_vPEQ>ul>li:hover:nth-child(11)>div>a::before, -.menu_vPEQ>ul>li:nth-child(11)>div>a::before { +.menu_vPEQ>ul>li:hover:nth-child(10)>div>a::before, +.menu_vPEQ>ul>li:nth-child(10)>div>a::before { background-image: url(../../static/img/Reference-Active.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(11)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li.menu__list-item--collapsed:nth-child(10)>div>a::before { background-image: url(../../static/img/Reference-Inactive-1.svg); } -html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(11)>div>a::before, -html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(11)>div>a::before { +html[data-theme='dark'] .menu_vPEQ>ul>li:hover:nth-child(10)>div>a::before, +html[data-theme='dark'] .menu_vPEQ>ul>li:nth-child(10)>div>a::before { background-image: url(../../static/img/Reference-Active-1.svg); } diff --git a/docs/website/src/theme/DltHubFeatureAdmonition.js b/docs/website/src/theme/DltHubFeatureAdmonition.js new file mode 100644 index 000000000..25853404d --- /dev/null +++ b/docs/website/src/theme/DltHubFeatureAdmonition.js @@ -0,0 +1,11 @@ +import Admonition from "@theme/Admonition"; + +export function DltHubFeatureAdmonition() { + return ( + dltHub}> +

+ This page is for dltHub Feature, which requires a license. Join our early access program for a trial license. +

+
+ ); +} diff --git a/docs/website/src/theme/DocBreadcrumbs/index.tsx b/docs/website/src/theme/DocBreadcrumbs/index.tsx index dacb63458..75dabe4ed 100644 --- a/docs/website/src/theme/DocBreadcrumbs/index.tsx +++ b/docs/website/src/theme/DocBreadcrumbs/index.tsx @@ -57,7 +57,7 @@ export default function DocBreadcrumbs(): ReactNode { const homePageRoute = useHomePageRoute(); const location = useLocation(); - const showPlus = location.pathname.includes("/plus/"); + const showPlus = location.pathname.includes("/hub/"); if (!breadcrumbs) { return null; @@ -88,7 +88,7 @@ export default function DocBreadcrumbs(): ReactNode { {showPlus && (
- dlt+ logo + dltHub logo
)} diff --git a/docs/website/src/theme/Heading/index.js b/docs/website/src/theme/Heading/index.js index f1838b5ce..16da0b77b 100644 --- a/docs/website/src/theme/Heading/index.js +++ b/docs/website/src/theme/Heading/index.js @@ -1,18 +1,18 @@ import React from "react"; import Heading from "@theme-original/Heading"; import { useLocation } from "@docusaurus/router"; -import { PlusAdmonition } from "../PlusAdmonition"; +import { DltHubFeatureAdmonition } from "../DltHubFeatureAdmonition"; export default function HeadingWrapper(props) { const location = useLocation(); - const showPlus = location.pathname.includes("/plus/"); + const showHub = location.pathname.includes("/hub/"); const { as } = props; - if (as === "h1" && showPlus) { + if (as === "h1" && showHub) { return ( <> - + ); } diff --git a/docs/website/src/theme/PlusAdmonition/index.js b/docs/website/src/theme/PlusAdmonition/index.js deleted file mode 100644 index 269d9dee6..000000000 --- a/docs/website/src/theme/PlusAdmonition/index.js +++ /dev/null @@ -1,11 +0,0 @@ -import Admonition from "@theme/Admonition"; - -export function PlusAdmonition() { - return ( - dlt+}> -

- This page is for dlt+, which requires a license. Join our early access program for a trial license. -

-
- ); -} diff --git a/docs/website/static/img/dlt+_logo.png b/docs/website/static/img/dlt+_logo.png deleted file mode 100644 index 013f380bd2aab53bbdcafd4c131e7b69a12dfa82..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1461 zcmV;m1xosfP)1bO=FtGTDn$CFeI!&LZrb5DL!bepy-SC!3TXRf(kxaA4H*E5UD|s zLVdS_B8vKug4NO*3zi1cti_s`OPfua&E95rW{xw#Z1ys{v$M&r6MKHJu=AZWGiT5L zn>pV(=Uafo;cz${_aWGnO9jWWO?^^uETZ~*yB2~P^28VCj>>_?55QO=Yh=XZ@yMq> zJJ|PXVg2VOroT-^RtG?+wqXnG-WB-am1CpVzxn)9L3QDUD}T*I=ayLZ(#FM`yYHmq zkB{yNhrGyImZ93xpbS=MKILEUw)M=8vzMfFQp-w{x7Tm=`zWpF`U1{b6{ zB%AW8qt{wTS^rluzlCv8$>2Vt>$4&H9qnj5{yK(Fe~JlI2^XYF6b&q+!k=psjj64! zZ*6YJUs*20i6D-ZCzPl)UJNLL8n+Ag^k^lD>)V(raPrDxt)Wo!-ocSzI-Rkd4JYEO zNg|P6bFx=Ylz1G2_lEXo?mCe7+la4Gx;*yYuW&hz#*o3I$DZwb=BY6yYQH)`C5qVW zJk*S$5|Fx$Rcn9GkfGP!xKd~jw0U3%Vn#SL-*Pw(k#SPIfg6mnbe5VCcX9gz=PyAv^am0!U0vuYWbG zW;_0mYfz8Bg^PhkE4A|JG~DB%!)QA9CDv>km`bh1;*mI#ksDc?`KnTtg#D9|gRZ1H z81tc?XcM7a(w-(X zeE2rL#`YlHe-9omQ(m?3R)Ww=ADuXBd>$+;7(Fp|e?n-tyA$L#;Dfr$;DWder>_%{nB&z}BPR-vrJXQu;kC^pfs)D{ zC$xT!Xwk$S;e5^n(S>VFQ^s{oy@e_K81_RP{SuXE(vL^oC7Sqn#RME2xmb zRj~LfzE@RjMjnO~sWq7CFZE5uuQA*0VTE&u?fT!IP-Pj1!{Kl^JDPt0S1qd4y`aLK P00000NkvXXu0mjf4obO~ diff --git a/docs/website/static/img/dlthub_logo.png b/docs/website/static/img/dlthub_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..254a7e7b573d31b869f29bdc55d3d4e1b7c6414e GIT binary patch literal 2054 zcmeAS@N?(olHy`uVBq!ia0y~yV73ObKXb4F$%Qo>*+7c3z$3Dlfq`2Xgc%uT&5-~K zG8PB9J29*~C-V}>VM%xNb!1@J*w6hZk(GggeYU5IV@L(#+goSzri98I|M=fnR(RUv z3g%KxmP?zQo67bummlSwu;aPV1nmz(QD&FsR#pK@o5V6wri zFJGpgPd7}rK3O^Y{SzM7>)YP%zabxWd)4IfitpXU^RIB;zE{7`+NUl1YGb0+n)>>m zZ&q75-u}&gpKtQXwXYnckN>>)=g*>w#}6;xTI*T5G3@-BtG|wJE?tzp)trCc)vt*w z=Ds@cG4SmAoNJ33_`iLq+`A_1b?KqDRY#Y8Hr3x{s>#66;E=@5z|f&m24s4SGU*Tw zCTsm__WrL)j*-q2({8(VjoYs=F?Yw-+W*givAp;E`MVkqnr$m$q$h8E^@uMo)V{WC zVV%ysTlF!UjkCYTmL{y(RsPfItysR!_1DK1r*92jW3}$~bhFnRtSA9NbEWJ*= zz-nt^_KLg?*&pJ^GS5!?{k^JmLGRZr%Z|C_b5_pUy0HJ}%q`}J^FA)md$vsdcxd^w z!-=H_jLjGr6eitZWMEM0lpQL}OJ82?71v+QymqUrbp7uo5r#f?zY>4$47{+r@W-mF z>;1dl{V}@s=;7x_vTLudVfeRrVOF4f>EZ~(x!2Bpx_5IWpU-sne^sRk5q15$Kc79E zboAR_@%8Ob*YfPmF8xz=UhZA+>aR&h0~r4Id(SS7SW{6FaQBYwyp6yxns|hdfkD9Y z4X|(+?99w4h7{>n8zL@7oo!nc$td9ahM~pUc;(ve^w*&uuQjKa)=D;7vpSfZzcE?{ z(7!A+$$f0GUXSN>II!r~597(blVK|IXYb2A>3*@Rjrn%}KHWOoyem|8N69`b%j*wH zEUZ?ko4!rHuk*k4ZdFM@9si!iyr!$eN-`{UrLVpIeJrf}*uuPU_GQ(dKX<=6@a^Bh zu&vud*9chsnY{Ps^7hjYH&}gef1$(wcP*3O+@EQC None: "command": "deploy", "secrets_format": "env", } + + +@pytest.mark.skipif(sys.stdin.isatty(), reason="stdin connected, test skipped") +def test_no_tty() -> None: + with fmt.maybe_no_stdin(): + assert fmt.confirm("test", default=True) is True + assert fmt.prompt("test prompt", ("y", "n"), default="y") == "y" diff --git a/tests/cli/test_deploy_command.py b/tests/cli/test_deploy_command.py index 3d39e6823..4e3ef7744 100644 --- a/tests/cli/test_deploy_command.py +++ b/tests/cli/test_deploy_command.py @@ -8,13 +8,12 @@ from git import InvalidGitRepositoryError, NoSuchPathError import pytest import dlt - from dlt.common.runners import Venv from dlt.common.storages.file_storage import FileStorage from dlt.common.typing import StrAny from dlt.common.utils import set_working_dir -from dlt.cli import deploy_command, _dlt, echo +from dlt.cli import deploy_command, echo, command_wrappers from dlt.cli.exceptions import CliCommandInnerException from dlt.pipeline.exceptions import CannotRestorePipelineException from dlt.cli.deploy_command_helpers import get_schedule_description @@ -49,7 +48,7 @@ def test_deploy_command_no_repo( # test wrapper with pytest.raises(CliCommandException) as ex: - _dlt.deploy_command_wrapper( + command_wrappers.deploy_command_wrapper( "debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, @@ -83,7 +82,7 @@ def test_deploy_command( ) assert "Your current repository has no origin set" in py_ex.value.args[0] with pytest.raises(CliCommandInnerException): - _dlt.deploy_command_wrapper( + command_wrappers.deploy_command_wrapper( "debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, @@ -100,7 +99,7 @@ def test_deploy_command( **deployment_args, ) with pytest.raises(CliCommandException) as ex: - _dlt.deploy_command_wrapper( + command_wrappers.deploy_command_wrapper( "debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, @@ -124,7 +123,7 @@ def test_deploy_command( ) assert "The last pipeline run ended with error" in py_ex2.value.args[0] with pytest.raises(CliCommandException) as ex: - _dlt.deploy_command_wrapper( + command_wrappers.deploy_command_wrapper( "debug_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, @@ -176,7 +175,7 @@ def test_deploy_command( ) with echo.always_choose(False, always_choose_value=True): with pytest.raises(CliCommandException) as ex: - _dlt.deploy_command_wrapper( + command_wrappers.deploy_command_wrapper( "no_pipeline.py", deployment_method, deploy_command.COMMAND_DEPLOY_REPO_LOCATION, diff --git a/tests/common/runtime/test_telemetry.py b/tests/common/runtime/test_telemetry.py index bb2973a1b..a06e1e866 100644 --- a/tests/common/runtime/test_telemetry.py +++ b/tests/common/runtime/test_telemetry.py @@ -182,6 +182,17 @@ def test_track_anon_event( assert context["run_context"] == "dlt" +def test_forced_anon_tracker() -> None: + from dlt.common.runtime import anon_tracker + + assert anon_tracker._ANON_TRACKER_ENDPOINT is None + + with anon_tracker.always_track(): + assert anon_tracker._ANON_TRACKER_ENDPOINT is not None + + assert anon_tracker._ANON_TRACKER_ENDPOINT is None + + def test_execution_context_with_plugin() -> None: import sys diff --git a/tests/plus/__init__.py b/tests/hub/__init__.py similarity index 50% rename from tests/plus/__init__.py rename to tests/hub/__init__.py index 323abb98e..ac0c0792b 100644 --- a/tests/plus/__init__.py +++ b/tests/hub/__init__.py @@ -1,4 +1,4 @@ """ -A few basic tests that guard against the worst regeressions between dlt and dlt+ +A few basic tests that guard against the worst regressions between dlt and dlthub features plugin dlt-plus needs to be installed to run these tests, a license is not required at this point. """ diff --git a/tests/plus/test_cli.py b/tests/hub/test_cli.py similarity index 100% rename from tests/plus/test_cli.py rename to tests/hub/test_cli.py diff --git a/tests/plus/test_destinations.py b/tests/hub/test_destinations.py similarity index 100% rename from tests/plus/test_destinations.py rename to tests/hub/test_destinations.py diff --git a/tests/plus/test_sources.py b/tests/hub/test_sources.py similarity index 100% rename from tests/plus/test_sources.py rename to tests/hub/test_sources.py diff --git a/tests/hub/test_transformations.py b/tests/hub/test_transformations.py new file mode 100644 index 000000000..09acfefbb --- /dev/null +++ b/tests/hub/test_transformations.py @@ -0,0 +1,14 @@ +import dlt + + +def test_transformation_decorator() -> None: + # current version blocks declarations with licenses + # @dlt.hub.transformation + # def get_even_rows(dataset: dlt.Dataset): + # return dataset.table("items").filter("id % 2 = 0") + + # # get instance without license + # transformation = get_even_rows(dlt.dataset("duckdb", "mock_dataset")) + # assert transformation.name == "get_even_rows" + + assert dlt.hub.transformation is not None