mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
docs - improves hub docs (#3282)
* adds tools to generate api reference for workspace * writes install, mcp, api reference and improves other docs in hub * Apply suggestions from code review Co-authored-by: Violetta Mishechkina <sansiositres@gmail.com> * fixes free tier --------- Co-authored-by: Violetta Mishechkina <sansiositres@gmail.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import argparse
|
||||
import textwrap
|
||||
import os
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
import dlt._workspace.cli.echo as fmt
|
||||
|
||||
@@ -48,7 +49,10 @@ class _WidthFormatter(argparse.RawTextHelpFormatter):
|
||||
def render_argparse_markdown(
|
||||
name: str,
|
||||
parser: argparse.ArgumentParser,
|
||||
/,
|
||||
*,
|
||||
header: str = HEADER,
|
||||
commands: List[str] = None,
|
||||
) -> str:
|
||||
def get_parser_help_recursive(
|
||||
parser: argparse.ArgumentParser,
|
||||
@@ -56,6 +60,7 @@ def render_argparse_markdown(
|
||||
parent: str = "",
|
||||
nesting: int = 0,
|
||||
help_string: str = None,
|
||||
commands: List[str] = None,
|
||||
) -> str:
|
||||
markdown = ""
|
||||
|
||||
@@ -145,13 +150,18 @@ def render_argparse_markdown(
|
||||
f"Missing helpstring for argument '{arg_title}' in section '{header}' of"
|
||||
f" command '{cmd}'."
|
||||
)
|
||||
|
||||
quoted_title = ""
|
||||
if is_subcommands_list:
|
||||
full_command = f"{cmd} {arg_title}"
|
||||
anchor_slug = full_command.lower().replace(" ", "-")
|
||||
arg_title = f"[`{arg_title}`](#{anchor_slug})"
|
||||
# skip unwanted commands
|
||||
if not commands or arg_title in commands:
|
||||
full_command = f"{cmd} {arg_title}"
|
||||
anchor_slug = full_command.lower().replace(" ", "-")
|
||||
quoted_title = f"[`{arg_title}`](#{anchor_slug})"
|
||||
else:
|
||||
arg_title = f"`{arg_title}`"
|
||||
section += f"* {arg_title} - {arg_help.capitalize()}\n"
|
||||
quoted_title = f"`{arg_title}`"
|
||||
if quoted_title:
|
||||
section += f"* {quoted_title} - {arg_help.capitalize()}\n"
|
||||
|
||||
extracted_sections.append({"header": header.capitalize(), "section": section})
|
||||
|
||||
@@ -181,10 +191,15 @@ def render_argparse_markdown(
|
||||
markdown += "</details>\n\n"
|
||||
|
||||
# traverse the subparsers and forward help strings to the recursive function
|
||||
commands_found = []
|
||||
for action in parser._actions:
|
||||
if isinstance(action, argparse._SubParsersAction):
|
||||
for subaction in action._get_subactions():
|
||||
subparser = action._name_parser_map[subaction.dest]
|
||||
# skip unwanted commands
|
||||
if commands and subaction.dest not in commands:
|
||||
continue
|
||||
commands_found.append(subaction.dest)
|
||||
assert (
|
||||
subaction.help
|
||||
), f"Subparser help string of {subaction.dest} is empty, please provide one."
|
||||
@@ -194,9 +209,17 @@ def render_argparse_markdown(
|
||||
parent=cmd,
|
||||
nesting=nesting + 1,
|
||||
help_string=subaction.help,
|
||||
commands=None,
|
||||
)
|
||||
if commands and set(commands_found) != set(commands):
|
||||
raise RuntimeError(
|
||||
f"Following commands were expected:: {commands} but found only {commands_found}."
|
||||
" Typically this means that dlthub was not installed or workspace context was "
|
||||
" not found."
|
||||
)
|
||||
|
||||
return markdown
|
||||
|
||||
markdown = get_parser_help_recursive(parser, name)
|
||||
markdown = get_parser_help_recursive(parser, name, commands=commands)
|
||||
|
||||
return header + markdown
|
||||
|
||||
@@ -692,6 +692,13 @@ If you are reading this on the docs website, you are looking at the rendered ver
|
||||
|
||||
self.parser.add_argument("file_name", nargs=1, help="Output file name")
|
||||
|
||||
self.parser.add_argument(
|
||||
"--commands",
|
||||
nargs="*",
|
||||
help="List of command names to render (optional)",
|
||||
default=None,
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--compare",
|
||||
default=False,
|
||||
@@ -705,7 +712,7 @@ If you are reading this on the docs website, you are looking at the rendered ver
|
||||
|
||||
parser, _ = _create_parser()
|
||||
|
||||
result = render_argparse_markdown("dlt", parser)
|
||||
result = render_argparse_markdown("dlt", parser, commands=args.commands)
|
||||
|
||||
if args.compare:
|
||||
with open(args.file_name[0], "r", encoding="utf-8") as f:
|
||||
@@ -769,7 +776,8 @@ class WorkspaceCommand(SupportsCliCommand):
|
||||
command = "workspace"
|
||||
help_string = "Manage current Workspace"
|
||||
description = """
|
||||
Commands to get info, cleanup local files and launch Workspace MCP
|
||||
Commands to get info, cleanup local files and launch Workspace MCP. Run without command get
|
||||
workspace info.
|
||||
"""
|
||||
|
||||
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
||||
|
||||
0
docs/tools/dlthub_cli/.dlt/.workspace
Normal file
0
docs/tools/dlthub_cli/.dlt/.workspace
Normal file
11
docs/tools/dlthub_cli/Makefile
Normal file
11
docs/tools/dlthub_cli/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
.PHONY: install-dlthub, update-cli-docs, check-cli-docs
|
||||
# this must be run from `dlthub_cli` to see workspace commands
|
||||
|
||||
install-dlthub:
|
||||
uv pip install dlthub
|
||||
|
||||
update-cli-docs: install-dlthub
|
||||
uv run dlt --debug render-docs ../../website/docs/hub/command-line-interface.md --commands license workspace profile
|
||||
|
||||
check-cli-docs: install-dlthub
|
||||
uv run dlt --debug render-docs ../../website/docs/hub/command-line-interface.md --compare --commands license workspace profile
|
||||
398
docs/website/docs/hub/command-line-interface.md
Normal file
398
docs/website/docs/hub/command-line-interface.md
Normal file
@@ -0,0 +1,398 @@
|
||||
---
|
||||
title: Command Line Interface
|
||||
description: Command line interface (CLI) full reference of dlt
|
||||
keywords: [command line interface, cli, dlt init]
|
||||
---
|
||||
|
||||
|
||||
# Command line interface reference
|
||||
|
||||
<!-- this page is fully generated from the argparse object of dlt, run make update-cli-docs to update it -->
|
||||
|
||||
This page contains all commands available in the dltHub CLI and is generated
|
||||
automatically from the fully populated python argparse object of dlt.
|
||||
:::note
|
||||
Flags and positional commands are inherited from the parent command. Position within the command string
|
||||
is important. For example if you want to enable debug mode on the pipeline command, you need to add the
|
||||
debug flag to the base dlt command:
|
||||
|
||||
```sh
|
||||
dlt --debug pipeline
|
||||
```
|
||||
|
||||
Adding the flag after the pipeline keyword will not work.
|
||||
:::
|
||||
|
||||
## `dlt`
|
||||
|
||||
Creates, adds, inspects and deploys dlt pipelines. Further help is available at https://dlthub.com/docs/reference/command-line-interface.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt [-h] [--version] [--disable-telemetry] [--enable-telemetry]
|
||||
[--non-interactive] [--debug] [--no-pwd]
|
||||
{pipeline,workspace,telemetry,schema,profile,init,render-docs,deploy,dashboard,ai,transformation,source,project,license,destination,dbt,dataset,cache}
|
||||
...
|
||||
```
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
* `--version` - Show program's version number and exit
|
||||
* `--disable-telemetry` - Disables telemetry before command is executed
|
||||
* `--enable-telemetry` - Enables telemetry before command is executed
|
||||
* `--non-interactive` - Non interactive mode. default choices are automatically made for confirmations and prompts.
|
||||
* `--debug` - Displays full stack traces on exceptions. useful for debugging if the output is not clear enough.
|
||||
* `--no-pwd` - Do not add current working directory to sys.path. by default $pwd is added to reproduce python behavior when running scripts.
|
||||
|
||||
**Available subcommands**
|
||||
* [`workspace`](#dlt-workspace) - Manage current workspace
|
||||
* [`profile`](#dlt-profile) - Manage workspace built-in profiles
|
||||
* [`license`](#dlt-license) - View dlthub license status
|
||||
|
||||
</details>
|
||||
|
||||
## `dlt workspace`
|
||||
|
||||
Manage current Workspace.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt workspace [-h] {clean,info,mcp,show} ...
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Commands to get info, cleanup local files and launch Workspace MCP. Run without command get
|
||||
workspace info.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt`](#dlt).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
**Available subcommands**
|
||||
* [`clean`](#dlt-workspace-clean) - Cleans local data for the selected profile. locally loaded data will be deleted. pipelines working directories are also deleted by default. data in remote destinations is not affected.
|
||||
* [`info`](#dlt-workspace-info) - Displays workspace info.
|
||||
* [`mcp`](#dlt-workspace-mcp) - Launch dlt mcp server in current python environment and workspace in sse transport mode by default.
|
||||
* [`show`](#dlt-workspace-show) - Shows workspace dashboard for the pipelines and data in this workspace.
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt workspace clean`
|
||||
|
||||
Cleans local data for the selected profile. Locally loaded data will be deleted. Pipelines working directories are also deleted by default. Data in remote destinations is not affected.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt workspace clean [-h] [--skip-data-dir]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Cleans local data for the selected profile. Locally loaded data will be deleted. Pipelines working directories are also deleted by default. Data in remote destinations is not affected.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt workspace`](#dlt-workspace).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
* `--skip-data-dir` - Do not delete pipelines working dir.
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt workspace info`
|
||||
|
||||
Displays workspace info.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt workspace info [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Displays workspace info.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt workspace`](#dlt-workspace).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt workspace mcp`
|
||||
|
||||
Launch dlt MCP server in current Python environment and Workspace in SSE transport mode by default.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt workspace mcp [-h] [--stdio] [--port PORT]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
This MCP allows to attach to any pipeline that was previously ran in this workspace and then facilitates schema and data exploration in the pipeline's dataset.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt workspace`](#dlt-workspace).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
* `--stdio` - Use stdio transport mode
|
||||
* `--port PORT` - Sse port to use (default: 43654)
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt workspace show`
|
||||
|
||||
Shows Workspace Dashboard for the pipelines and data in this workspace.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt workspace show [-h] [--edit]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Shows Workspace Dashboard for the pipelines and data in this workspace.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt workspace`](#dlt-workspace).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
* `--edit` - Eject dashboard and start editable version
|
||||
|
||||
</details>
|
||||
|
||||
## `dlt profile`
|
||||
|
||||
Manage Workspace built-in profiles.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt profile [-h] [profile_name] {info,list,pin} ...
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Commands to list and pin profiles
|
||||
Run without arguments to list all profiles, the default profile and the
|
||||
pinned profile in current project.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt`](#dlt).
|
||||
|
||||
**Positional arguments**
|
||||
* `profile_name` - Name of the profile
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
**Available subcommands**
|
||||
* [`info`](#dlt-profile-info) - Show information about the current profile.
|
||||
* [`list`](#dlt-profile-list) - Show list of built-in profiles.
|
||||
* [`pin`](#dlt-profile-pin) - Pin a profile to the workspace.
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt profile info`
|
||||
|
||||
Show information about the current profile.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt profile [profile_name] info [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Show information about the current profile.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt profile`](#dlt-profile).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt profile list`
|
||||
|
||||
Show list of built-in profiles.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt profile [profile_name] list [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Show list of built-in profiles.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt profile`](#dlt-profile).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt profile pin`
|
||||
|
||||
Pin a profile to the Workspace.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt profile [profile_name] pin [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Pin a profile to the Workspace, this will be the new default profile while it is pinned.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt profile`](#dlt-profile).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
## `dlt license`
|
||||
|
||||
View dlthub license status.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt license [-h] {info,scopes,issue} ...
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
View dlthub license status.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt`](#dlt).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
**Available subcommands**
|
||||
* [`info`](#dlt-license-info) - Show the installed license
|
||||
* [`scopes`](#dlt-license-scopes) - Show available scopes
|
||||
* [`issue`](#dlt-license-issue) - Issues a self-signed trial license that may be used for development, testing and for ci ops.
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt license info`
|
||||
|
||||
Show the installed license.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt license info [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Show the installed license.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt license`](#dlt-license).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt license scopes`
|
||||
|
||||
Show available scopes.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt license scopes [-h]
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Show available scopes.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt license`](#dlt-license).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
### `dlt license issue`
|
||||
|
||||
Issues a self-signed trial license that may be used for development, testing and for ci ops.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt license issue [-h] scope
|
||||
```
|
||||
|
||||
**Description**
|
||||
|
||||
Issue a new self-signed trial license.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Show Arguments and Options</summary>
|
||||
|
||||
Inherits arguments from [`dlt license`](#dlt-license).
|
||||
|
||||
**Positional arguments**
|
||||
* `scope` - Scope of the license, a comma separated list of the scopes: ['dlthub.dbt_generator', 'dlthub.sources.mssql', 'dlthub.project', 'dlthub.transformation', 'dlthub.destinations.iceberg', 'dlthub.destinations.snowflake_plus', 'dlthub.runner']
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
|
||||
</details>
|
||||
|
||||
@@ -48,7 +48,7 @@ Currently, a cache usage has specific constraints. Please keep the following lim
|
||||
You can configure input tables in the cache to specify which tables are cached locally. This allows you to run SQL queries on remote data lakes efficiently, eliminating complex data retrieval workflows.
|
||||
Outputs define how processed data in the cache is pushed back to a chosen destination.
|
||||
|
||||
[Populating](../reference.md#dlt-cache-populate) and [flushing](../reference.md#dlt-cache-flush) the cache are discrete steps.
|
||||
Populating and flushing the cache are discrete steps.
|
||||
You can orchestrate these as part of your deployment or trigger them interactively using the cli, especially when analyzing data locally or working in a notebook.
|
||||
|
||||
## Why you should use it
|
||||
|
||||
@@ -24,32 +24,19 @@ Profiles let you:
|
||||
|
||||
## Enable the workspace and profiles
|
||||
|
||||
:::info
|
||||
Currently, some Workspace features (including Profiles) are experimental and disabled by default.
|
||||
They are hidden behind a feature flag, which means you need to manually enable them before use.
|
||||
Before you start, make sure that you followed [installation instructions](../getting-started/installation.md) and enabled [additional Workspace features](../getting-started/installation.md#enable-dlthub-free-tier-features) (which also include Profiles)
|
||||
|
||||
To activate these features, create the `.dlt/.workspace` file in your project directory; this tells `dlt` to switch from the classic project mode to the new Workspace mode.
|
||||
:::
|
||||
**dltHub Workspace** is a unified environment for developing, running, and maintaining data pipelines — from local development to production.
|
||||
|
||||
Profiles are part of the [dltHub Workspace](../workspace/overview.md) feature.
|
||||
To use them, first install `dlt` with Workspace support:
|
||||
[More about dlt Workspace ->](../workspace/overview.md)
|
||||
|
||||
```sh
|
||||
pip install "dlt[workspace]"
|
||||
```
|
||||
[Initialize](../workspace/init) a project:
|
||||
|
||||
```sh
|
||||
dlt init dlthub:pokemon_api duckdb
|
||||
```
|
||||
|
||||
Then, enable the Workspace feature by creating a flag file:
|
||||
|
||||
```sh
|
||||
touch .dlt/.workspace
|
||||
```
|
||||
|
||||
Once enabled, the Workspace automatically activates **profile support** and adds new commands such as:
|
||||
Once initialized, the Workspace automatically activates **profile support** and adds new commands such as:
|
||||
|
||||
```sh
|
||||
dlt profile
|
||||
|
||||
@@ -6,6 +6,10 @@ keywords: [delta, delta lake]
|
||||
|
||||
# Delta
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
The Delta destination is based on the [filesystem destination](../../dlt-ecosystem/destinations/filesystem.md) in dlt. All configuration options from the filesystem destination can be configured as well.
|
||||
|
||||
:::warning
|
||||
|
||||
@@ -6,6 +6,10 @@ keywords: [Iceberg, pyiceberg]
|
||||
|
||||
# Iceberg
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
Apache Iceberg is an open table format designed for high-performance analytics on large datasets. It supports ACID transactions, schema evolution, and time travel.
|
||||
|
||||
The Iceberg destination in dlt allows you to load data into Iceberg tables using the [pyiceberg](https://py.iceberg.apache.org/) library. It supports multiple catalog types and both local and cloud storage backends.
|
||||
|
||||
@@ -6,6 +6,10 @@ keywords: [MSSQL, CDC, Change Tracking, MSSQL replication]
|
||||
|
||||
# MS SQL replication
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
dltHub provides a comprehensive solution for syncing an MS SQL Server table using [Change Tracking](https://learn.microsoft.com/en-us/sql/relational-databases/track-changes/about-change-tracking-sql-server), a solution similar to CDC. By leveraging SQL Server's native Change Tracking feature, you can efficiently load incremental data changes — including inserts, updates, and deletes — into your destination.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -6,6 +6,10 @@ keywords: [Snowflake, Iceberg, destination]
|
||||
|
||||
# Snowflake+ Iceberg / Open Catalog
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
Snowflake+ is a drop-in replacement for [OSS Snowflake destination](../../dlt-ecosystem/destinations/snowflake.md) that adds [Apache Iceberg tables](https://docs.snowflake.com/en/user-guide/tables-iceberg) creation and related features.
|
||||
|
||||
It uses Snowflake to manage Iceberg data - tables are created and data is copied via Snowflake SQL and automatically visible in Snowflake (HORIZON)
|
||||
|
||||
@@ -4,20 +4,30 @@ description: Install the dlt MCP with your preferred LLM-enabled IDE.
|
||||
keywords: [mcp, llm, agents, ai]
|
||||
---
|
||||
|
||||
# Workspace MCP Server - current status
|
||||
## Overview
|
||||
|
||||
The server can do the following:
|
||||
- list pipelines in workspace
|
||||
- inspect table schema and data for dataset in particular pipeline
|
||||
- do sql queries
|
||||
dltHub Workspace comes with an MCP server you can run locally and integrate with your preferred IDE. It provides a set of tools for interacting with pipelines and datasets:
|
||||
- Explore and describe pipeline schemas
|
||||
- Access and explore data in destination tables
|
||||
- Write SQL queries, models, and transformations
|
||||
- Combining all the above, it provides efficient help with writing reports, notebook code, and dlt pipelines themselves
|
||||
|
||||
It is the same server that is called **the open-source `dlt`** in the documentation below.
|
||||
🚧 (in development) We are adding a set of tools that help drill down into pipeline run traces to find possible problems and root causes for incidents.
|
||||
|
||||
Since all mcp clients work with `sse` transport, it is the default when running the server. Before we were struggling with
|
||||
launching `mcp` as a part of client process. There was no way to pass right Python virtual environment and dlt run context.
|
||||
There were also issues with `stdio` pollution from `print` statement (overall that was IMO a dead end, mcp is a server by nature.)
|
||||
The MCP server can be started in:
|
||||
- Workspace context, where it will see all the pipelines in it
|
||||
- Pipeline context, where it is attached to a single pipeline
|
||||
|
||||
To launch the server in workspace context:
|
||||
Users can start as many MCP servers as necessary. The default configurations and examples below assume that workspace and pipeline MCP servers can work side by side.
|
||||
|
||||
## Launch MCP server
|
||||
|
||||
Since all MCP clients work with `sse` transport now, it is the default when running the server. The MCP will attach to the workspace context
|
||||
and pipelines in it. It must be able to start in the same Python environment and see the same workspace as `dlt` when running pipelines.
|
||||
There were also issues with `stdio` pollution from `print` statements—one misbehaving dependency could break the transport channel.
|
||||
TL;DR: We still support `stdio` transport, but it is not recommended.
|
||||
|
||||
To launch the server in **workspace context**:
|
||||
```sh
|
||||
dlt workspace mcp
|
||||
|
||||
@@ -26,10 +36,9 @@ INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://127.0.0.1:43654 (Press CTRL+C to quit)
|
||||
```
|
||||
Workspace mcp server has **43654** as default port and is configured without any path (ie `/sse`) so user can just copy the link above in the right
|
||||
client.
|
||||
The workspace MCP server has **43654** as the default port and is configured without any path (i.e., `/sse`), so users can just copy the link above into the appropriate client.
|
||||
|
||||
To launch the server in pipeline context:
|
||||
To launch the server in **pipeline context**:
|
||||
```sh
|
||||
dlt pipeline fruitshop mcp
|
||||
|
||||
@@ -40,12 +49,12 @@ INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://127.0.0.1:43656 (Press CTRL+C to quit)
|
||||
|
||||
```
|
||||
Pipeline mcp server has **43656** as default port. Pipeline is already attached when mcp server starts. Both pipeline and workspace mcps can work side by side.
|
||||
The pipeline MCP server has **43656** as the default port. The pipeline is already attached when the MCP server starts. Both pipeline and workspace MCP servers can work side by side.
|
||||
|
||||
|
||||
Example client configurations
|
||||
### Configure MCP server
|
||||
|
||||
Cursor, Cline
|
||||
#### Cursor, Cline, Claude Desktop
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
@@ -59,7 +68,8 @@ Cursor, Cline
|
||||
}
|
||||
```
|
||||
|
||||
Continue: for some reason it does see mcp configuration created in dev container. Maybe someone will make it work...
|
||||
### Continue (local)
|
||||
|
||||
```yaml
|
||||
name: dlt mcps
|
||||
version: 0.0.1
|
||||
@@ -70,9 +80,17 @@ mcpServers:
|
||||
url: "http://localhost:43654"
|
||||
```
|
||||
|
||||
## Configuration
|
||||
Server can still be started with `stdio` transport and different port using the command line. The plan is to allow to configure
|
||||
mcp deeply via dlt configuration system.
|
||||
### Continue Hub
|
||||
|
||||
With Continue, you can use [Continue Hub](https://docs.continue.dev/hub/introduction) for a 1-click install of the MCP, or a local config file. Select `Agent Mode` to enable the MCP server.
|
||||
|
||||
See the [dltHub page](https://hub.continue.dev/dlthub) and select the `dlt` or `dltHub` Assistant. This bundles the MCP with additional Continue-specific features.
|
||||
|
||||
## Configure MCP server
|
||||
The server can still be started with `stdio` transport and a different port using the command line.
|
||||
|
||||
🚧 The feature below is in development and not yet available:
|
||||
The plan is to allow full configuration of MCP via the dlt configuration system.
|
||||
|
||||
```toml
|
||||
[workspace.mcp]
|
||||
@@ -85,273 +103,16 @@ port=888
|
||||
transport="stdio"
|
||||
```
|
||||
|
||||
## Interactions with Runtime
|
||||
This is a heads-up on how we host mcps on runtime. To be deleted.
|
||||
|
||||
* deployed workspace dashboard has two routes `/app` to see the notebook and `/mcp` to connect to mcp server
|
||||
* workspace dashboard in single pipeline mode `/app/fruitshop` and `/mcp/fruitshop`
|
||||
* I'm also pondering exposing some kind of mcp attached to each marimo notebook
|
||||
|
||||
|
||||
# Project MCP server
|
||||
|
||||
This is our "project" mcp (**integrates with `dltHub` features** below) and can be launched with:
|
||||
```sh
|
||||
dlt project mcp
|
||||
```
|
||||
It gets **43655** port and project context is obtained before launching the server.
|
||||
|
||||
|
||||
|
||||
|
||||
# MCP Server
|
||||
|
||||
Currently, dltHub is [building two MCP servers](https://dlthub.com/blog/deep-dive-assistants-mcp-continue) that you can run locally and integrate with your preferred IDE. One server is for the open-source `dlt` library and the other integrates with `dltHub` features ([Learn more](ai.md)).
|
||||
|
||||
This page gives an overview of what we're building and includes detailed instructions to install the MCP in your favorite IDE.
|
||||
|
||||
:::warning
|
||||
🚧 This feature is under development, and the interface may change in future releases. Interested in becoming an early tester? [Join dltHub early access](https://info.dlthub.com/waiting-list).
|
||||
:::
|
||||
|
||||
## Model Context Protocol
|
||||
The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP) is a standard initiated by Anthropic to connect large language models (LLMs) to external data and systems.
|
||||
|
||||
In the context of the MCP, the **client** is built-in the user-facing application. The most common clients are LLM-enabled IDEs or extensions such as Continue, Cursor, Claude Desktop, Cline, etc. The **server** is a process that handles requests to interact with external data and systems.
|
||||
In the context of MCP, the **client** is built into the user-facing application. The most common clients are LLM-enabled IDEs or extensions such as Continue, Cursor, Claude Desktop, Cline, etc. The **server** is a process that handles requests to interact with external data and systems.
|
||||
|
||||
### Core constructs
|
||||
|
||||
- **Resources** are data objects that can be retrieved by the client and added to the context (i.e., prompt) of the LLM request. Resources will be manually selected by the user, or certain clients will automatically retrieved them.
|
||||
- **Resources** are data objects that can be retrieved by the client and added to the context (i.e., prompt) of the LLM request. Resources will be manually selected by the user, or certain clients will automatically retrieve them.
|
||||
|
||||
- **Tools** provided a way to execute code and provide information to the LLM. Tools are called by the LLM; they can't be selected by the user or the client.
|
||||
- **Tools** provide a way to execute code and provide information to the LLM. Tools are called by the LLM; they can't be selected by the user or the client.
|
||||
|
||||
- **Prompts** are strings, or templated strings, that can be injected in the conversation. Prompts are selected by the user. They provide shortcuts for frequent commands, or allow to ask the LLMs to use specific tools.
|
||||
|
||||
|
||||
:::note
|
||||
The MCP is progressively being adopted and not all clients support all the features used by the dlt MCP server. See [this page](https://modelcontextprotocol.io/clients) for an overview of client capabilities
|
||||
:::
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
The dlt and dltHub MCP servers aim to be a toolbox to help developers build, maintain, and operate `dlt` pipelines. There are two primary avenues:
|
||||
|
||||
- **Code generation**: LLMs are good at writing Python code, but they don't know everything about `dlt`. The MCP provides resources and tools to provide up-to-date information to the LLm about the dlt library and the specifics of your project.
|
||||
|
||||
- **Assistance**: The MCP resources, tools, and prompts can also be used to learn more about `dlt`. The built-in knowledgebase combined with your project's context brings Q&A support inside your IDE.
|
||||
|
||||
The next sections are a non-exhaustive documentation of existing and upcoming features.
|
||||
|
||||
### Tools
|
||||
|
||||
- Pipeline metadata: read your pipeline directory (default: `~/.dlt/pipelines`) to know available pipelines, available tables, table schemas.
|
||||
|
||||
- Operational metadata: read your pipeline directory to identify last load date, schema changes, load errors, and inspect load packages.
|
||||
|
||||
- dlt configuration: use an instantiated pipeline to inspect the dlt configuration (sources, resources, destinations).
|
||||
|
||||
- Datasets: connect to the destination and execute SQL queries to retrieve data tables via light text-to-SQL.
|
||||
|
||||
|
||||
### Resources
|
||||
|
||||
- LLM-optimized dlt documentation pages. These can be selected and added to your prompt to help the LLM generate valid `dlt` code.
|
||||
|
||||
### Prompts
|
||||
|
||||
- Tutorial-like instructions that puts the LLM in "assistant mode". For example, the LLM can ask you questions about your data and "think" with you to select the right loading strategy.
|
||||
|
||||
- Command-like instructions that gives the LLM a task to complete. For example, the LLM can initialize a new pipeline. This is akin to a conversational command line tool.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
The `dlt` and `dltHub` MCP servers are intended to run locally on your machine and communicate over standard I/O. Typically, the MCP server process is launched by the MCP client, i.e., the IDE. We will use the [uv package manager](https://docs.astral.sh/uv/#installation) to launch the MCP server.
|
||||
|
||||
The next sections include client-specific instructions, references, and snippets to configure the MCP server. They are mainly derived from this `uv` command:
|
||||
|
||||
```sh
|
||||
uv tool run --with "dlthub[mcp]==0.9.0" dlt mcp run
|
||||
```
|
||||
|
||||
To explain each part:
|
||||
- [uv tool run](https://docs.astral.sh/uv/guides/tools/) executes the command in an isolated virtual environment
|
||||
-`--with PACKAGE_NAME` specifies the Python dependencies for the command that follows
|
||||
- `dlthub[mcp]` ensures to get all the extra dependencies for the MCP server
|
||||
- `dlthub==0.9.0` pins a specific `dlthub` version (where the MCP code lives). We suggest at least pinning the `dlthub` version to provide a consistent experience
|
||||
- `dlt mcp run` is a CLI command found in dltHub that starts the dlt MCP server. Use `dlt mcp run_plus` to
|
||||
|
||||
Then, to enable the MCP server and tool usage, several IDEs require you to enable "tool/agent/mcp mode".
|
||||
|
||||
### dltHub MCP server
|
||||
|
||||
To run the `dltHub` MCP server, you will need to set your [dltHub License](../getting-started/installation#licensing) globally in `~/.dlt/secrets.toml` or in an environment variable (must be set before lauching the IDE) and use `dlt mcp run_plus` in your configuration. If the `dltHub` license is missing, the dlt MCP server will be launched instead. You can tell the two apart by the tools, resources, and prompts available.
|
||||
|
||||
|
||||
### Continue
|
||||
|
||||
With Continue, you can use [Continue Hub](https://docs.continue.dev/hub/introduction) for a 1-click install of the MCP, or a local config file. Select `Agent Mode` to enable the MCP server.
|
||||
|
||||
#### Continue Hub
|
||||
See the [dltHub page](https://hub.continue.dev/dlthub) and select the `dlt` or `dltHub` Assistants. This bundles the MCP with additional Continue-specific features. You can also select the `dlt` or `dltHub` MCP blocks to install the server exclusively.
|
||||
|
||||
#### Local
|
||||
You can define an assistant locally with the same YAML syntax as the Continue Hub by adding files to `$PROJECT_ROOT/.continue/assistants`. This snippet creates an assistant with the MCP only.
|
||||
|
||||
```yaml
|
||||
# local_dlt.yaml
|
||||
name: dlt MCP # can change
|
||||
version: 0.0.1 # can change
|
||||
schema: v1
|
||||
mcpServers:
|
||||
- name: dlt # can change
|
||||
command: uv
|
||||
args:
|
||||
- tool
|
||||
- run
|
||||
- --with
|
||||
- dlthub[mcp]==0.9.0
|
||||
- dlt
|
||||
- mcp
|
||||
- run
|
||||
```
|
||||
|
||||
There's also a global configuration specs in JSON
|
||||
```json
|
||||
{
|
||||
"experimental": {
|
||||
"modelContextProtocolServers": [
|
||||
{
|
||||
"transport": {
|
||||
"type": "stdio",
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"tool",
|
||||
"run",
|
||||
"--with",
|
||||
"dlthub[mcp]==0.9.0",
|
||||
"dlt",
|
||||
"mcp",
|
||||
"run"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Claude Desktop
|
||||
|
||||
You need to [add a JSON configuration file](https://modelcontextprotocol.io/quickstart/user#2-add-the-filesystem-mcp-server) on your system. See our [full Claude Desktop tutorial](ai.md)
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"dlt": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"tool",
|
||||
"run",
|
||||
"--with",
|
||||
"dlthub[mcp]==0.9.0",
|
||||
"dlt",
|
||||
"mcp",
|
||||
"run"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Cursor
|
||||
|
||||
Select **Agent Mode** to enable the MCP server. The [configuration](https://docs.cursor.com/context/model-context-protocol#configuring-mcp-servers) follows the same JSON specs as Claude Desktop
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"dlt": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"tool",
|
||||
"run",
|
||||
"--with",
|
||||
"dlthub[mcp]==0.9.0",
|
||||
"dlt",
|
||||
"mcp",
|
||||
"run"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Cline
|
||||
|
||||
Follow [this tutorial](https://docs.cline.bot/mcp-servers/mcp-quickstart) to use the IDE's menu to add MCP servers.
|
||||
|
||||
### Manual server launch (advanced)
|
||||
|
||||
The following methods allow the user to manually launch the MCP server from their preferred directory and Python environment before connecting to the IDE. The basic installation methods let the IDE launch the server over STDIO in isolation from the current project directory and Python environment.
|
||||
|
||||
We won't use `uv tool run` because we want to use the local Python environment instead of an isolated one. We assume that `dlthub[mcp]` is installed in the environment, giving access to the `dlt mcp` command.
|
||||
|
||||
:::warning
|
||||
🚧 The MCP, IDE features, and the dlt MCP server are all rapidly evolving and some details are likely to change.
|
||||
:::
|
||||
|
||||
#### SSE transport
|
||||
|
||||
To launch the server using [Server-Sent Events (SSE) transport](https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse), modify the CLI command to:
|
||||
|
||||
```sh
|
||||
dlt mcp run_plus --sse --port 43655
|
||||
```
|
||||
|
||||
Then, configure your IDE to connect to the local connection at `http://127.0.0.1:43655/sse` (the `/sse` path is important). Many IDEs don't currently support SSE connection, but [Cline does](https://docs.cline.bot/mcp-servers/configuring-mcp-servers#sse-transport).
|
||||
|
||||
#### Proxy mode
|
||||
|
||||
Since many IDEs don't currently support SSE, we use a workaround with two MCP servers using the [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy) library:
|
||||
|
||||
- the **proxy server** is launched by the IDE and communicates with the IDE over STDIO
|
||||
- the **dlt server** is launched by the user in the desired directory and environment and communicates with the **proxy server** over SSE
|
||||
|
||||
The benefit of this approach is that you will never need to update the IDE configuration; it always launches the same proxy server. Then, you can launch the dlt server from any context.
|
||||
|
||||
To launch the proxy server, follow the basic installation method (Cursor, Continue, Claude, etc.), but change the command to
|
||||
|
||||
```sh
|
||||
uv tool run mcp-proxy "http://127.0.0.1:43655/sse"
|
||||
```
|
||||
|
||||
For example, Cursor would use this config.
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"dlt": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"tool",
|
||||
"run",
|
||||
"mcp-proxy",
|
||||
"http://127.0.0.1:43655/sse"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
To launch the dlt server, use this command to start communication over SSE. The `--port` value should match the IDE config.
|
||||
|
||||
```sh
|
||||
dlt mcp run_plus --sse --port 43655
|
||||
```
|
||||
|
||||
:::warning
|
||||
The **proxy server** typically fails at startup if the **dlt server** is not already running. After launching the **dlt server**, use the "reconnect" button in the IDE to have the proxy connect to it.
|
||||
:::
|
||||
- **Prompts** are strings, or templated strings, that can be injected into the conversation. Prompts are selected by the user. They provide shortcuts for frequent commands or allow asking the LLM to use specific tools.
|
||||
|
||||
@@ -5,6 +5,10 @@ description: Define dltHub Projects in YAML
|
||||
|
||||
# Project
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
|
||||
<img src="https://storage.googleapis.com/dlt-blog-images/plus/dlt_plus_projects.png" width="500"/>
|
||||
|
||||
|
||||
@@ -5,6 +5,10 @@ description: Python interface of dltHub Project
|
||||
|
||||
# Python API to interact with dltHub Project
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
You can access any [dltHub Project](../../core-concepts/project.md) entity or function via the Python interface.
|
||||
The current module provides access to various parts of your active dltHub Project.
|
||||
|
||||
|
||||
@@ -5,6 +5,10 @@ description: How to setup sources in YAML file
|
||||
|
||||
# Source configuration
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
|
||||
The `dlt.yml` file enables a fully declarative setup of your data source and its parameters. It supports built-in sources such as REST APIs, SQL databases, and cloud storage, as well as any custom source you define.
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
---
|
||||
title: dbt generator
|
||||
title: dbt model generator
|
||||
description: Generate dbt models automatically
|
||||
---
|
||||
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
The **dbt generator** creates scaffolding for dbt projects using data ingested by dlt. It analyzes the pipeline schema and automatically generates staging and fact dbt models. By integrating with dlt-configured destinations, it automates code creation and supports incremental loading, ensuring that only new records are processed in both the ingestion and transformation layers.
|
||||
|
||||
The dbt generator can be used as part of the local transformations feature as well as a standalone tool, enabling you to generate dbt models for any dlt pipeline.
|
||||
|
||||
@@ -5,16 +5,9 @@ keywords: [transformation, dataset, sql, pipeline, ibis, arrow]
|
||||
---
|
||||
# Transformations: Reshape data after loading
|
||||
|
||||
import Admonition from "@theme/Admonition";
|
||||
import { DltHubFeatureAdmonition } from '@theme/DltHubFeatureAdmonition';
|
||||
|
||||
<Admonition type="note" title={<span style={{ textTransform: "lowercase" }}>dltHub</span>}>
|
||||
<p>
|
||||
Transformations are part of **dltHub**. This module is currently available in 🧪 preview to selected users and projects.
|
||||
Contact us to get your [trial license](../../EULA.md)
|
||||
<br/>
|
||||
<em>[Copyright © 2025 dltHub Inc. All rights reserved.](../../EULA.md)</em>
|
||||
</p>
|
||||
</Admonition>
|
||||
<DltHubFeatureAdmonition />
|
||||
|
||||
`dlt transformations` let you build new tables or full datasets from datasets that have _already_ been ingested with `dlt`. `dlt transformations` are written and run in a very similar fashion to dlt source and resources. `dlt transformations` require you to have loaded data to a location, for example a local duckdb database, a bucket or a warehouse on which the transformations may be executed. `dlt transformations` are fully supported for all of our sql destinations including all filesystem and bucket formats.
|
||||
|
||||
|
||||
@@ -96,9 +96,9 @@ Now you can update the generated transformations and create new ones to reflect
|
||||
|
||||
## Run transformations
|
||||
|
||||
dltHub offers comprehensive CLI support for executing transformations. You can find the full list of available commands in the [command line interface](../../reference.md).
|
||||
dltHub offers comprehensive CLI support for executing transformations. You can find the full list of available commands in the command line interface.
|
||||
|
||||
To run the defined transformation, use the [following command](../../reference.md#dlt-transformation-run):
|
||||
To run the defined transformation, use the following command:
|
||||
|
||||
```sh
|
||||
dlt transformation <transformation_name> run
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: Installation
|
||||
description: Installation information of dlthub package
|
||||
description: Installation information for the dlthub package
|
||||
---
|
||||
|
||||
:::info Supported Python versions
|
||||
@@ -11,165 +11,126 @@ dltHub currently supports Python versions 3.9-3.13.
|
||||
|
||||
## Quickstart
|
||||
|
||||
To install the `dlt[workspace]` package, run:
|
||||
To install the `dlt[workspace]` package, create a new [Python virtual environment](#setting-up-your-environment) and run:
|
||||
```sh
|
||||
uv pip install "dlt[workspace]"
|
||||
```
|
||||
This will install `dlt` with several additional dependencies you'll need for local development: `arrow`, `marimo`, `mcp`, and a few others.
|
||||
If you need to install `uv` (a modern package manager), [please refer to the next section](#configuration-of-the-python-environment).
|
||||
|
||||
### Enable dltHub Free tier features
|
||||
|
||||
:::info
|
||||
The most recent [dltHub Free tier features](../intro.md#tiers--licensing) like profiles are hidden behind a feature flag,
|
||||
which means you need to manually enable them before use.
|
||||
|
||||
To activate these features, create an empty `.dlt/.workspace` file in your project directory; this tells `dlt` to switch from the classic project mode to the Workspace mode.
|
||||
|
||||
<Tabs values={[{"label": "Ubuntu", "value": "ubuntu"}, {"label": "macOS", "value": "macos"}, {"label": "Windows", "value": "windows"}]} groupId="operating-systems" defaultValue="ubuntu">
|
||||
<TabItem value="ubuntu">
|
||||
|
||||
```sh
|
||||
pip install "dlt[workspace]"
|
||||
mkdir -p .dlt && touch .dlt/.workspace
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="macos">
|
||||
|
||||
This comes with the dlthub package. To use the features of dlthub, please get a valid license key before proceeding, as described under [licensing](#self-licensing).
|
||||
```sh
|
||||
mkdir -p .dlt && touch .dlt/.workspace
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="windows">
|
||||
|
||||
```sh
|
||||
mkdir .dlt
|
||||
type nul > .dlt\.workspace
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
:::
|
||||
|
||||
### Enable features that require a license
|
||||
|
||||
Licensed features come with a commercial Python `dlthub` package:
|
||||
|
||||
```sh
|
||||
uv pip install -U dlthub
|
||||
```
|
||||
|
||||
Please install a valid license before proceeding, as described under [licensing](#self-licensing).
|
||||
|
||||
## Setting up your environment
|
||||
|
||||
### Configuration of the Python environment
|
||||
|
||||
Check if your Python environment is configured:
|
||||
In this documentation, we use `uv` (a modern package manager) to install Python versions, manage virtual environments, and manage project dependencies.
|
||||
To install `uv`, you can use `pip` or follow [the OS-specific installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
Once you have `uv` installed you can pick any Python version supported by it:
|
||||
|
||||
```sh
|
||||
python --version
|
||||
pip --version
|
||||
uv python install 3.13
|
||||
```
|
||||
|
||||
If you have a different Python version installed or are missing pip, follow the instructions below to update your Python version and/or install `pip`.
|
||||
|
||||
<Tabs values={[{"label": "Ubuntu", "value": "ubuntu"}, {"label": "macOS", "value": "macos"}, {"label": "Windows", "value": "windows"}]} groupId="operating-systems" defaultValue="ubuntu">
|
||||
<TabItem value="ubuntu">
|
||||
|
||||
You can install Python 3.10 with `apt`.
|
||||
|
||||
```sh
|
||||
sudo apt update
|
||||
sudo apt install python3.10
|
||||
pip install uv
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="macos">
|
||||
|
||||
On macOS, you can use [Homebrew](https://brew.sh) to install Python 3.10.
|
||||
|
||||
```sh
|
||||
brew update
|
||||
brew install python@3.10
|
||||
pip install uv
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="windows">
|
||||
|
||||
After installing [Python 3.10 (64-bit version) for Windows](https://www.python.org/downloads/windows/), you can install `pip`.
|
||||
|
||||
```sh
|
||||
C:\> pip3 install -U pip
|
||||
C:\> pip3 install uv
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
or use any Python version you have installed on your system.
|
||||
|
||||
### Virtual environment
|
||||
|
||||
We recommend working within a [virtual environment](https://docs.python.org/3/library/venv.html) when creating Python projects.
|
||||
This way, all the dependencies for your current project will be isolated from packages in other projects.
|
||||
|
||||
<Tabs values={[{"label": "Ubuntu", "value": "ubuntu"}, {"label": "macOS", "value": "macos"}, {"label": "Windows", "value": "windows"}]} groupId="operating-systems" defaultValue="ubuntu">
|
||||
|
||||
<TabItem value="ubuntu">
|
||||
|
||||
Create a new virtual environment in your working folder. This will create a `./venv` directory where your virtual environment will be stored:
|
||||
This way, all the dependencies for your current project will be isolated from packages in other projects. With `uv`, run:
|
||||
```sh
|
||||
uv venv
|
||||
```
|
||||
This will create a virtual environment in the `.venv` folder using the default system Python version.
|
||||
|
||||
```sh
|
||||
uv venv --python 3.10
|
||||
uv venv --python 3.13
|
||||
```
|
||||
This will use `Python 3.13` for your virtual environment.
|
||||
|
||||
Activate the virtual environment:
|
||||
|
||||
Activate the virtual environment using the instructions displayed by `uv`, i.e.:
|
||||
|
||||
```sh
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="macos">
|
||||
|
||||
Create a new virtual environment in your working folder. This will create a `./venv` directory where your virtual environment will be stored:
|
||||
|
||||
```sh
|
||||
uv venv --python 3.10
|
||||
```
|
||||
|
||||
Activate the virtual environment:
|
||||
|
||||
```sh
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="windows">
|
||||
|
||||
Create a new virtual environment in your working folder. This will create a `./venv` directory where your virtual environment will be stored:
|
||||
|
||||
```bat
|
||||
C:\> uv venv --python 3.10
|
||||
```
|
||||
|
||||
Activate the virtual environment:
|
||||
|
||||
```bat
|
||||
C:\> .\venv\Scripts\activate
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Install dlthub
|
||||
|
||||
`dlthub` will be automatically installed with workspace extra:
|
||||
|
||||
```sh
|
||||
# install the newest dlt[workspace] version or upgrade the existing version to the newest one
|
||||
uv pip install -U "dlt[workspace]"
|
||||
```
|
||||
|
||||
Please install a valid license before proceeding, as described under [licensing](#self-licensing).
|
||||
|
||||
|
||||
|
||||
## Licensing
|
||||
|
||||
To access dltHub’s paid features, such as Iceberg support or Python-based transformations, you need a dltHub Software License.
|
||||
|
||||
|
||||
When you purchase a paid dltHub offering, the required license will be issued and managed automatically for your account.
|
||||
|
||||
You can also manually configure a license for local development or CI environments as shown below.
|
||||
1. [Contact us](https://info.dlthub.com/waiting-list) if you want to purchase a license or get a trial license with unlimited use.
|
||||
2. Issue a [limited trial license](#self-licensing) yourself.
|
||||
|
||||
|
||||
#### Applying your license
|
||||
#### Install your license
|
||||
|
||||
You can provide your license key in one of two ways:
|
||||
If you've received your license from us, you can install it in one of two ways:
|
||||
|
||||
In the `secrets.toml` file:
|
||||
```toml
|
||||
license = "your-dlthub-license-key"
|
||||
```
|
||||
|
||||
As an environment variable
|
||||
As an environment variable:
|
||||
```sh
|
||||
export DLT_LICENSE_KEY="your-dlthub-license-key"
|
||||
```
|
||||
|
||||
#### Features requiring a license:
|
||||
|
||||
- [@dlt.hub.transformation](../features/transformations/index.md) - powerful Python decorator to build transformation pipelines and notebooks
|
||||
- [dbt transformations](../features/transformations/dbt-transformations.md): a staging layer for data transformations, combining a local cache with schema enforcement, debugging tools, and integration with existing data workflows.
|
||||
- [Iceberg support](../ecosystem/iceberg.md)
|
||||
- [Secure data access and sharing](../features/data-access.md)
|
||||
- [AI workflows](../features/ai.md): agents to augment your data engineering team.
|
||||
- [@dlt.hub.transformation](../features/transformations/index.md) - a powerful Python decorator to build transformation pipelines and notebooks
|
||||
- [dbt transformations](../features/transformations/dbt-transformations.md) - a staging layer for data transformations, combining a local cache with schema enforcement, debugging tools, and integration with existing data workflows.
|
||||
- [Iceberg support](../ecosystem/iceberg.md).
|
||||
- [MSSQL Change Tracking source](../ecosystem/ms-sql.md).
|
||||
|
||||
For more information about the feature scopes, see [Scopes](#scopes).
|
||||
Please also review our End User License Agreement [(EULA)](../EULA.md)
|
||||
Please, also review our [End User License Agreement](../EULA.md)
|
||||
|
||||
### Self-licensing
|
||||
|
||||
@@ -184,21 +145,23 @@ Choose a scope for the feature you want to test, then issue a license with:
|
||||
```sh
|
||||
dlt license issue <scope>
|
||||
```
|
||||
This command will:
|
||||
* Print your license key in the cli output
|
||||
* License key will be put into your toml file
|
||||
|
||||
#### Scopes:
|
||||
for example:
|
||||
```sh
|
||||
dlt license issue dlthub.transformation
|
||||
```
|
||||
This will do the following:
|
||||
* Issue a new license (or merge with existing scopes) for the [transformations](../features/transformations/index.md) feature.
|
||||
* Print your license key in the CLI output.
|
||||
* Put the license key into your `toml` file.
|
||||
|
||||
* `*`: All features
|
||||
* `dlthub`: All dltHub features
|
||||
* `dlthub.dbt_generator`: [Generate dbt packages from dlt pipelines](../features/transformations/dbt-transformations)
|
||||
* `dlthub.sources.mssql`: [Change tracking for MSSQL](../ecosystem/ms-sql)
|
||||
* `dlthub.project`: [Declarative yaml interface for dlt](../features/project/)
|
||||
* `dlthub.transformation`: [Python-first query-agnostic data transformations](../features/transformations/)
|
||||
* `dlthub.destinations.iceberg`: [Iceberg destination with full catalog support](../ecosystem/iceberg)
|
||||
* `dlthub.destinations.snowflake_plus`: [Snowflake iceberg extension with Open Catalog](../ecosystem/snowflake_plus)
|
||||
* `dlthub.runner`: Production pipeline runner and orchestrator support
|
||||
#### Scopes
|
||||
|
||||
Display available scopes by running the following command:
|
||||
|
||||
```sh
|
||||
dlt license scopes
|
||||
```
|
||||
|
||||
You can self-issue multiple licenses; newly issued licenses will automatically include previously granted features.
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ To follow this tutorial, make sure:
|
||||
- you're familiar with the [core concepts of dlt](../../reference/explainers/how-dlt-works.md)
|
||||
|
||||
:::tip
|
||||
You can find the full list of available cli commands under [cli reference](../reference.md)
|
||||
You can find the full list of available cli commands under cli reference
|
||||
:::
|
||||
|
||||
## Creating a new dltHub Project
|
||||
@@ -46,7 +46,7 @@ This command generates a project named `tutorial` with:
|
||||
- one dataset on the DuckDB destination
|
||||
|
||||
:::warning
|
||||
Currently, `dlt project init` only supports a limited number of sources (for example, [REST API](../../dlt-ecosystem/verified-sources/rest_api/index.md), [SQL database](../../dlt-ecosystem/verified-sources/sql_database/index.md), [filesystem](../../dlt-ecosystem/verified-sources/filesystem/index.md), etc.). To list all available sources, please use the [cli command](../reference.md#dlt-source-list):
|
||||
Currently, `dlt project init` only supports a limited number of sources (for example, [REST API](../../dlt-ecosystem/verified-sources/rest_api/index.md), [SQL database](../../dlt-ecosystem/verified-sources/sql_database/index.md), [filesystem](../../dlt-ecosystem/verified-sources/filesystem/index.md), etc.). To list all available sources, please use the cli command:
|
||||
|
||||
```sh
|
||||
dlt source list-available
|
||||
@@ -133,7 +133,7 @@ Take a look at the [Projects context](../features/project/overview.md#project-co
|
||||
|
||||
### Inspecting the results
|
||||
|
||||
Use the [`dlt dataset` command](../reference.md#dlt-dataset) to interact with the dataset stored in the DuckDB destination. For example:
|
||||
Use the `dlt dataset` command to interact with the dataset stored in the DuckDB destination. For example:
|
||||
|
||||
### Counting the loaded rows
|
||||
To count rows in the dataset, run:
|
||||
@@ -187,7 +187,7 @@ dlt <entity_type> <entity_name> add
|
||||
```
|
||||
|
||||
Depending on the entity you are adding, different options are available.
|
||||
To explore all commands, refer to the [cli command reference](../reference.md). You can also use the `--help` option to see available settings for a specific entity. For example: `dlt destination add --help`. Let's individually add a source, destination, and pipeline to a new project, replicating the default project we created in the previous chapter.
|
||||
To explore all commands, refer to the cli command reference. You can also use the `--help` option to see available settings for a specific entity. For example: `dlt destination add --help`. Let's individually add a source, destination, and pipeline to a new project, replicating the default project we created in the previous chapter.
|
||||
|
||||
### Create an empty project
|
||||
|
||||
|
||||
@@ -61,10 +61,10 @@ dltHub embraces the dlt library, not replaces it:
|
||||
* dlt (OSS): Python library focused on extract & load with strong typing and schema handling.
|
||||
* dltHub: Adds transformations, quality, agentic tooling, managed runtime, and storage choices, so you can move from local dev to production seamlessly.
|
||||
|
||||
If you like the dlt developer experience, dltHub gives you everything around it to run in production with less toil.
|
||||
dltHub extends the dlt developer experience with new [local workspace layout](workspace/init.md), [configuration profiles](core-concepts/profiles-dlthub.md), [additional CLI commands](command-line-interface.md), workspace dashboard, [MCP server](features/mcp-server.md) and more.
|
||||
Those developer experience improvements belong to **dltHub Free tier** and are distributed side by side with the `dlt` under [Apache 2.0 license](https://github.com/dlt-hub/dlt?tab=Apache-2.0-1-ov-file#readme). You can use **dltHub Free tier** right away - like you use regular `dlt`.
|
||||
|
||||
While dltHub is a paid product, we also offer a dltHub Free tier, distributed under the [Apache 2.0 license](https://github.com/dlt-hub/dlt?tab=Apache-2.0-1-ov-file#readme),
|
||||
which you can start using right away.
|
||||
All features that require a license are part of dltHub paid tiers (i.e. Basic tier) and are clearly marked as such in this documentation. Those features are shipped via `dlthub` Python package (available on [PyPI](https://pypi.org/project/dlthub/)) which is not open source and can be used with a valid license.
|
||||
|
||||
## dltHub products
|
||||
dltHub consists of three main products. You can use them together or compose them based on your needs.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,8 @@ keywords: [create a pipeline, verified source, workspace, rest client, how to, d
|
||||
|
||||
This guide walks you through creating and initializing a `dlt` pipeline in dltHub Workspace — whether manually, with the LLM help, or from one of the **verified sources** maintained by dltHub team.
|
||||
|
||||
|
||||
|
||||
## Overview
|
||||
|
||||
A `dlt` pipeline moves data from a source (like an API or database) into a destination (like DuckDB, Snowflake, or Iceberg). Initializing a pipeline is the first step in the data workflow.
|
||||
@@ -22,15 +24,9 @@ You can create one in three CLI-based ways:
|
||||
|
||||
## Step 0: Install dlt with workspace support
|
||||
|
||||
To use workspace functionality, install dlt with the workspace extra:
|
||||
Before you start, make sure that you followed [installation instructions](../getting-started/installation.md) and enabled [additional Workspace features](../getting-started/installation.md#enable-dlthub-free-tier-features)
|
||||
|
||||
```sh
|
||||
pip install "dlt[workspace]"
|
||||
```
|
||||
|
||||
This adds support for AI-assisted workflows and the `dlt ai` command.
|
||||
|
||||
**dlt Workspace** is a unified environment for developing, running, and maintaining data pipelines — from local development to production.
|
||||
**dltHub Workspace** is a unified environment for developing, running, and maintaining data pipelines — from local development to production.
|
||||
|
||||
[More about dlt Workspace](../workspace/overview.md)
|
||||
|
||||
|
||||
@@ -29,15 +29,7 @@ For more information, please visit the [installation page](../getting-started/in
|
||||
|
||||
## CLI support
|
||||
|
||||
Workspace includes powerful CLI commands that make it easy to interact with your projects.
|
||||
Some of the available commands include:
|
||||
|
||||
* `dlt workspace show` will launch Workspace Dashboard
|
||||
* `dlt workspace mcp` will launch Workspace MCP (OSS MCP) in sse mode.
|
||||
* `dlt pipeline foo mcp` will launch pipeline MCP (old version of MCP, to be deprecated) in sse mode.
|
||||
* `dlt pipeline foo show` will launch Workspace Dashboard and open pipeline `foo`
|
||||
|
||||
You can find more commands in the [CLI reference section](../reference.md).
|
||||
Workspace comes with additional [cli support](../command-line-interface.md) that is enabled after installation.
|
||||
|
||||
## Next steps
|
||||
|
||||
|
||||
@@ -568,7 +568,7 @@ Renders markdown version of cli docs.
|
||||
|
||||
**Usage**
|
||||
```sh
|
||||
dlt render-docs [-h] [--compare] file_name
|
||||
dlt render-docs [-h] [--commands [COMMANDS ...]] [--compare] file_name
|
||||
```
|
||||
|
||||
**Description**
|
||||
@@ -587,6 +587,7 @@ Inherits arguments from [`dlt`](#dlt).
|
||||
|
||||
**Options**
|
||||
* `-h, --help` - Show this help message and exit
|
||||
* `--commands [COMMANDS ...]` - List of command names to render (optional)
|
||||
* `--compare` - Compare the changes and raise if output would be updated
|
||||
|
||||
</details>
|
||||
|
||||
@@ -548,7 +548,7 @@ const sidebars = {
|
||||
'hub/ecosystem/snowflake_plus',
|
||||
]
|
||||
},
|
||||
'hub/reference',
|
||||
'hub/command-line-interface',
|
||||
'hub/EULA',
|
||||
],
|
||||
};
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
import Admonition from "@theme/Admonition";
|
||||
import Link from "@docusaurus/Link";
|
||||
|
||||
export function DltHubFeatureAdmonition() {
|
||||
return (
|
||||
<Admonition type="note" title={<span>dltHub</span>}>
|
||||
<Admonition type="note" title={<span>dltHub Licensed Feature</span>}>
|
||||
<p>
|
||||
This page is for dltHub Feature, which requires a license. <a href="https://info.dlthub.com/waiting-list">Join our early access program</a> for a trial license.
|
||||
This feature requires <Link to="/docs/hub/getting-started/installation">installed <code>dlthub</code> package</Link> and an active <Link to="/docs/hub/getting-started/installation#licensing">license</Link>. You can <Link to="/docs/hub/getting-started/installation#self-licensing">self-issue a trial</Link> or <Link to="https://info.dlthub.com/waiting-list">join the waiting list</Link> for official access.
|
||||
<br/>
|
||||
<br/>
|
||||
<em><Link to="/docs/hub/EULA">Copyright © 2025 dltHub Inc. All rights reserved.</Link></em>
|
||||
</p>
|
||||
</Admonition>
|
||||
);
|
||||
|
||||
@@ -5,7 +5,7 @@ import { DltHubFeatureAdmonition } from "../DltHubFeatureAdmonition";
|
||||
|
||||
export default function HeadingWrapper(props) {
|
||||
const location = useLocation();
|
||||
const showHub = location.pathname.includes("/hub/");
|
||||
const showHub = false; //location.pathname.includes("/hub/");
|
||||
const { as } = props;
|
||||
|
||||
if (as === "h1" && showHub) {
|
||||
|
||||
Reference in New Issue
Block a user