2025 [dltHub](https://dlthub.com)
diff --git a/docs/website/docs/dlt-ecosystem/llm-tooling/llm-native-workflow.md b/docs/website/docs/dlt-ecosystem/llm-tooling/llm-native-workflow.md
index b78bddf79..3f6051eb5 100644
--- a/docs/website/docs/dlt-ecosystem/llm-tooling/llm-native-workflow.md
+++ b/docs/website/docs/dlt-ecosystem/llm-tooling/llm-native-workflow.md
@@ -1,10 +1,10 @@
---
-title: LLM-native workflow
+title: Build pipelines and reports with LLMs
description: How to extract and explore data from REST API with AI editors/agents
keywords: [cursor, llm, restapi, ai]
---
-# LLM-native workflow
+# Build dlt pipelines and reports with LLMs
## Overview
@@ -13,7 +13,7 @@ This guide walks you through a collaborative AI-human workflow for extracting an
You will learn:
1. How to initialize a dltHub workspace for your source using dltHub’s [LLM-context database](https://dlthub.com/workspace).
2. How to build a REST API source in minutes with AI assistance.
-3. How to debug a pipeline and explore data using the pipeline dashboard.
+3. How to debug a pipeline and explore data using the workspace dashboard.
4. How to start a new notebook and work with the pipeline’s dataset in it.
## Prerequisites
@@ -76,7 +76,7 @@ pip install "dlt[workspace]"
### Initialize workspace
-dltHub provides prepared contexts for 1000+ sources, available at [https://dlthub.com/workspace](https://dlthub.com/workspace). To get started, search for your API and follow the tailored instructions.
+We provide LLM context from over 5,000 sources, available at [https://dlthub.com/workspace](https://dlthub.com/workspace). To get started, search for your API and follow the tailored instructions.

@@ -154,7 +154,7 @@ Load package 1749667187.541553 is LOADED and contains no failed jobs
If the pipeline fails, pass error messages to the LLM. Restart after 4-8 failed attempts.
:::
-### Validate with pipeline dashboard
+### Validate with workspace dashboard
Launch the dashboard to validate your pipeline:
@@ -188,7 +188,7 @@ import dlt
my_data = dlt.pipeline("{source}_pipeline").dataset()
# get any table as Pandas frame
-# my_data.{table_name}.df().head()
+my_data.table("table_name").df().head()
```
For more, see the [dataset access guide](../../general-usage/dataset-access).
diff --git a/docs/website/docs/dlt-ecosystem/transformations/python.md b/docs/website/docs/dlt-ecosystem/transformations/python.md
index b1e2d031b..048924799 100644
--- a/docs/website/docs/dlt-ecosystem/transformations/python.md
+++ b/docs/website/docs/dlt-ecosystem/transformations/python.md
@@ -25,15 +25,20 @@ pipeline = dlt.pipeline(
dev_mode=True
)
-# get a dataframe of all reactions from the dataset
-reactions = pipeline.dataset().issues.select("reactions__+1", "reactions__-1", "reactions__laugh", "reactions__hooray", "reactions__rocket").df()
+# get a data frame of all reactions from the dataset
+github_issues = pipeline.dataset().table("issues")
+reactions = github_issues.select(
+ "reactions__+1", "reactions__-1", "reactions__laugh", "reactions__hooray", "reactions__rocket"
+).df()
# calculate and print out the sum of all reactions
counts = reactions.sum(0).sort_values(0, ascending=False)
print(counts)
# alternatively, you can fetch the data as an arrow table
-reactions = pipeline.dataset().issues.select("reactions__+1", "reactions__-1", "reactions__laugh", "reactions__hooray", "reactions__rocket").arrow()
+reactions = github_issues.select(
+ "reactions__+1", "reactions__-1", "reactions__laugh", "reactions__hooray", "reactions__rocket"
+).arrow()
# ... do transformations on the arrow table
```
@@ -55,7 +60,7 @@ pipeline = dlt.pipeline(
)
# get user relation with only a few columns selected, but omitting email and name
-users = pipeline.dataset().users.select("age", "amount_spent", "country")
+users = pipeline.dataset().table("users").select("age", "amount_spent", "country")
# load the data into a new table called users_clean in the same dataset
pipeline.run(users.iter_arrow(chunk_size=1000), table_name="users_clean")
@@ -79,7 +84,7 @@ pipeline = dlt.pipeline(
# NOTE: For selecting only users above 18, we could also use the filter method on the relation with ibis expressions
@dlt.resource(table_name="users_clean")
def users_clean():
- users = pipeline.dataset().users
+ users = pipeline.dataset().table("users")
for arrow_table in users.iter_arrow(chunk_size=1000):
# we want to filter out users under 18
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/openapi-generator.md b/docs/website/docs/dlt-ecosystem/verified-sources/openapi-generator.md
index a22675425..d8a51fc43 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/openapi-generator.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/openapi-generator.md
@@ -60,7 +60,7 @@ We will create a simple example pipeline from a [PokeAPI spec](https://pokeapi.c
dlt pipeline pokemon_pipeline info
```
-8. You can now also install marimo to see a preview of the data in the pipeline dashboard; you should have loaded 40 Pokemons and their details.
+8. You can now also install marimo to see a preview of the data in the workspace dashboard; you should have loaded 40 Pokemons and their details.
```sh
pip install pandas marimo
dlt pipeline pokemon_pipeline show
diff --git a/docs/website/docs/general-usage/dashboard.md b/docs/website/docs/general-usage/dashboard.md
index 0bb31dd3e..c17b7febe 100644
--- a/docs/website/docs/general-usage/dashboard.md
+++ b/docs/website/docs/general-usage/dashboard.md
@@ -1,15 +1,15 @@
---
-title: Inspect your pipeline with the pipeline dashboard
+title: Inspect your pipeline with the workspace dashboard
description: Open a comprehensive dashboard with information about your pipeline
keywords: [pipeline, schema, data, inspect]
---
-# Inspect your pipeline with the pipeline dashboard
+# Inspect your pipeline with the workspace dashboard
Once you have run a pipeline locally, you can launch a web app that displays detailed information about your pipeline. This app is built with the Marimo Python notebook framework. For this to work, you will need to have the `marimo` package installed.
:::tip
-The pipeline dashboard app works with all destinations that are supported by our dataset. Vector databases are generally unsupported at this point; however, you can still inspect metadata such as run traces, schemas, and pipeline state.
+The workspace dashboard app works with all destinations that are supported by our dataset. Vector databases are generally unsupported at this point; however, you can still inspect metadata such as run traces, schemas, and pipeline state.
:::
## Features
@@ -113,9 +113,9 @@ This provides an overview and detailed information about loads found in the _dlt

-## Creating your own pipeline dashboard
+## Creating your own workspace dashboard
-You can eject the code for the pipeline dashboard into your current working directory and start editing it to create a custom version that fits your needs. To do this, run the `show` command with the `--edit` flag:
+You can eject the code for the workspace dashboard into your current working directory and start editing it to create a custom version that fits your needs. To do this, run the `show` command with the `--edit` flag:
```sh
dlt pipeline {pipeline_name} show --edit
diff --git a/docs/website/docs/general-usage/dataset-access/dataset.md b/docs/website/docs/general-usage/dataset-access/dataset.md
index 3e04e112d..203cde23f 100644
--- a/docs/website/docs/general-usage/dataset-access/dataset.md
+++ b/docs/website/docs/general-usage/dataset-access/dataset.md
@@ -16,7 +16,7 @@ Here's a full example of how to retrieve data from a pipeline and load it into a
## Getting started
-Assuming you have a `Pipeline` object (let's call it `pipeline`), you can obtain a `Dataset` which is contains the crendentials and schema to your destination dataset. You can run construct a query and execute it on the dataset to retrieve a `Relation` which you may use to retrieve data from the `Dataset`.
+Assuming you have a `Pipeline` object (let's call it `pipeline`), you can obtain a `Dataset` which is contains the credentials and schema to your destination dataset. You can construct a query and execute it on the dataset to retrieve a `Relation` which you may use to retrieve data from the `Dataset`.
**Note:** The `Dataset` and `Relation` objects are **lazy-loading**. They will only query and retrieve data when you perform an action that requires it, such as fetching data into a DataFrame or iterating over the data. This means that simply creating these objects does not load data into memory, making your code more efficient.
diff --git a/docs/website/docs/general-usage/dataset-access/dataset_snippets/dataset_snippets.py b/docs/website/docs/general-usage/dataset-access/dataset_snippets/dataset_snippets.py
index 8705a5846..542b1a337 100644
--- a/docs/website/docs/general-usage/dataset-access/dataset_snippets/dataset_snippets.py
+++ b/docs/website/docs/general-usage/dataset-access/dataset_snippets/dataset_snippets.py
@@ -42,7 +42,7 @@ def quick_start_example_snippet(pipeline: dlt.Pipeline) -> None:
dataset = pipeline.dataset()
# Step 2: Access a table as a ReadableRelation
- customers_relation = dataset.customers # Or dataset["customers"]
+ customers_relation = dataset.table("customers")
# Step 3: Fetch the entire table as a Pandas DataFrame
df = customers_relation.df() # or customers_relation.df(chunk_size=50)
@@ -64,8 +64,8 @@ def getting_started_snippet(pipeline: dlt.Pipeline) -> None:
def accessing_tables_snippet(dataset: dlt.Dataset) -> None:
# @@@DLT_SNIPPET_START accessing_tables
- # Using attribute access
- customers_relation = dataset.customers
+ # Using `table` method`
+ customers_relation = dataset.table("customers")
# Using item access
customers_relation = dataset["customers"]
@@ -73,7 +73,7 @@ def accessing_tables_snippet(dataset: dlt.Dataset) -> None:
def fetch_entire_table_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START fetch_entire_table_df
df = customers_relation.df()
@@ -89,7 +89,7 @@ def fetch_entire_table_snippet(dataset: dlt.Dataset) -> None:
def iterating_chunks_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START iterating_df_chunks
for df_chunk in customers_relation.iter_df(chunk_size=5):
# Process each DataFrame chunk
@@ -124,15 +124,15 @@ def context_manager_snippet(dataset: dlt.Dataset) -> None:
# the dataset context manager will keep the connection open
# and close it after the with block is exited
- with dataset as dataset_:
- print(dataset.customers.limit(50).arrow())
- print(dataset.purchases.arrow())
+ with dataset:
+ print(dataset.table("customers").limit(50).arrow())
+ print(dataset.table("purchases").arrow())
# @@@DLT_SNIPPET_END context_manager
def limiting_records_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START limiting_records
# Get the first 50 items as a PyArrow table
arrow_table = customers_relation.limit(50).arrow()
@@ -144,7 +144,7 @@ def limiting_records_snippet(dataset: dlt.Dataset) -> None:
def select_columns_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START select_columns
# Select only 'id' and 'name' columns
items_list = customers_relation.select("id", "name").fetchall()
@@ -158,7 +158,7 @@ def select_columns_snippet(dataset: dlt.Dataset) -> None:
def order_by_snippet(default_dataset: dlt.Dataset) -> None:
- customers_relation = default_dataset.customers
+ customers_relation = default_dataset.table("customers")
# @@@DLT_SNIPPET_START order_by
# Order by 'id'
ordered_list = customers_relation.order_by("id").fetchall()
@@ -166,7 +166,7 @@ def order_by_snippet(default_dataset: dlt.Dataset) -> None:
def filter_snippet(default_dataset: dlt.Dataset) -> None:
- customers_relation = default_dataset.customers
+ customers_relation = default_dataset.table("customers")
# @@@DLT_SNIPPET_START filter
# Filter by 'id'
filtered = customers_relation.where("id", "in", [3, 1, 7]).fetchall()
@@ -186,7 +186,7 @@ def filter_snippet(default_dataset: dlt.Dataset) -> None:
def aggregate_snippet(default_dataset: dlt.Dataset) -> None:
- customers_relation = default_dataset.customers
+ customers_relation = default_dataset.table("customers")
# @@@DLT_SNIPPET_START aggregate
# Get max 'id'
@@ -199,7 +199,7 @@ def aggregate_snippet(default_dataset: dlt.Dataset) -> None:
def chain_operations_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START chain_operations
# Select columns and limit the number of records
@@ -267,21 +267,21 @@ def ibis_expressions_snippet(pipeline: dlt.Pipeline) -> None:
def fetch_one_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START fetch_one
record = customers_relation.fetchone()
# @@@DLT_SNIPPET_END fetch_one
def fetch_many_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START fetch_many
records = customers_relation.fetchmany(10)
# @@@DLT_SNIPPET_END fetch_many
def iterating_with_limit_and_select_snippet(dataset: dlt.Dataset) -> None:
- customers_relation = dataset.customers
+ customers_relation = dataset.table("customers")
# @@@DLT_SNIPPET_START iterating_with_limit_and_select
# Dataframes
for df_chunk in customers_relation.select("id", "name").limit(100).iter_df(chunk_size=20): ...
diff --git a/docs/website/docs/general-usage/dataset-access/streamlit.md b/docs/website/docs/general-usage/dataset-access/streamlit.md
index 3fe093277..0621ceaeb 100644
--- a/docs/website/docs/general-usage/dataset-access/streamlit.md
+++ b/docs/website/docs/general-usage/dataset-access/streamlit.md
@@ -13,7 +13,7 @@ The Streamlit app does not work with all destinations supported by `dlt`. Only d
:::
:::warning
-The Streamlit app is not under active development anymore and may soon be deprecated. We encourage all users to use the [pipeline dashboard](../dashboard.md)
+The Streamlit app is not under active development anymore and may soon be deprecated. We encourage all users to use the [workspace dashboard](../dashboard.md)
:::
## Prerequisites
diff --git a/docs/website/docs/hub/features/project/python-api.md b/docs/website/docs/hub/features/project/python-api.md
index c7784d6f0..292792fbf 100644
--- a/docs/website/docs/hub/features/project/python-api.md
+++ b/docs/website/docs/hub/features/project/python-api.md
@@ -126,7 +126,7 @@ dataset = dlt.hub.current.project.catalog().dataset("my_pipeline_dataset") # ty
# This function reads data in chunks from an existing table and yields each chunk
def transform_frames():
# Read the 'items' table in chunks of 1000 rows
- for df in dataset.items.iter_df(chunk_size=1000):
+ for df in dataset.table("items").iter_df(chunk_size=1000):
# You can process the data here if needed
yield df
diff --git a/docs/website/docs/hub/features/transformations/transformation-snippets.py b/docs/website/docs/hub/features/transformations/transformation-snippets.py
index 86b801e3c..6bcad7a8b 100644
--- a/docs/website/docs/hub/features/transformations/transformation-snippets.py
+++ b/docs/website/docs/hub/features/transformations/transformation-snippets.py
@@ -121,8 +121,8 @@ def multiple_transformation_instructions_snippet(fruitshop_pipeline: dlt.Pipelin
# this (probably nonsensical) transformation will create a union of the customers and purchases tables
@dlt.hub.transformation(write_disposition="append")
def union_of_tables(dataset: dlt.Dataset) -> Any:
- yield dataset.customers
- yield dataset.purchases
+ yield dataset.table("purchases")
+ yield dataset.table("customers")
# @@@DLT_SNIPPET_END multiple_transformation_instructions
@@ -210,7 +210,7 @@ def arrow_dataframe_operations_snippet(fruitshop_pipeline: dlt.Pipeline) -> None
@dlt.hub.transformation
def copied_customers(dataset: dlt.Dataset) -> Any:
# get full customers table as arrow table
- customers = dataset.customers.arrow()
+ customers = dataset.table("customers").arrow()
# Sort the table by 'name'
sorted_customers = customers.sort_by([("name", "ascending")])
@@ -222,8 +222,8 @@ def arrow_dataframe_operations_snippet(fruitshop_pipeline: dlt.Pipeline) -> None
@dlt.hub.transformation
def enriched_purchases(dataset: dlt.Dataset) -> Any:
# get both fully tables as dataframes
- purchases = dataset.purchases.df()
- customers = dataset.customers.df()
+ purchases = dataset.table("purchases").df()
+ customers = dataset.table("customers").df()
# Merge (JOIN) the DataFrames
result = purchases.merge(customers, left_on="customer_id", right_on="id")
diff --git a/docs/website/docs/hub/intro.md b/docs/website/docs/hub/intro.md
index d0c3f54d6..dfb29a44a 100644
--- a/docs/website/docs/hub/intro.md
+++ b/docs/website/docs/hub/intro.md
@@ -17,7 +17,7 @@ dltHub is built around the open-source library [dlt](../intro.md). It uses the s
dltHub supports both local and managed cloud development. A single developer can deploy and operate pipelines, transformations, and notebooks directly from a dltHub Workspace, using a single command.
-The dltHub Runtime, customizable pipeline dashboard, and validation tools make it straightforward to monitor, troubleshoot, and keep data reliable throughout the whole end-to-end data workflow:
+The dltHub Runtime, customizable workspace dashboard, and validation tools make it straightforward to monitor, troubleshoot, and keep data reliable throughout the whole end-to-end data workflow:
```mermaid
flowchart LR
diff --git a/docs/website/docs/intro.md b/docs/website/docs/intro.md
index 4d44c0867..f8efd1ce7 100644
--- a/docs/website/docs/intro.md
+++ b/docs/website/docs/intro.md
@@ -12,22 +12,26 @@ import snippets from '!!raw-loader!./intro-snippets.py';
## What is dlt?
-dlt is an open-source Python library that loads data from various, often messy data sources into well-structured, live datasets. It offers a lightweight interface for extracting data from [REST APIs](./tutorial/rest-api), [SQL databases](./tutorial/sql-database), [cloud storage](./tutorial/filesystem), [Python data structures](./tutorial/load-data-from-an-api), and [many more](./dlt-ecosystem/verified-sources).
+dlt is an open-source Python library that loads data from various, often messy data sources into well-structured datasets. It provides lightweight Python interfaces to extract, load, inspect and transform the data. dlt and dlt docs are built ground up to be used with LLMs: [LLM-native workflow](dlt-ecosystem/llm-tooling/llm-native-workflow.md) will take you pipeline code to data in a notebook for over [5,000 sources](https://dlthub.com/workspace).
dlt is designed to be easy to use, flexible, and scalable:
+- dlt extracts data from [REST APIs](./tutorial/rest-api), [SQL databases](./tutorial/sql-database), [cloud storage](./tutorial/filesystem), [Python data structures](./tutorial/load-data-from-an-api), and [many more](./dlt-ecosystem/verified-sources)
- dlt infers [schemas](./general-usage/schema) and [data types](./general-usage/schema/#data-types), [normalizes the data](./general-usage/schema/#data-normalizer), and handles nested data structures.
- dlt supports a variety of [popular destinations](./dlt-ecosystem/destinations/) and has an interface to add [custom destinations](./dlt-ecosystem/destinations/destination) to create reverse ETL pipelines.
-- dlt can be deployed anywhere Python runs, be it on [Airflow](./walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer), [serverless functions](./walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions), or any other cloud deployment of your choice.
- dlt automates pipeline maintenance with [incremental loading](./general-usage/incremental-loading), [schema evolution](./general-usage/schema-evolution), and [schema and data contracts](./general-usage/schema-contracts).
+- dlt supports [Python and SQL data access](general-usage/dataset-access/), [transformations](dlt-ecosystem/transformations) and supports [pipeline inspection](general-usage/dashboard.md) and [visualizing data in Marimo Notebooks](general-usage/dataset-access/marimo).
+- dlt can be deployed anywhere Python runs, be it on [Airflow](./walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer), [serverless functions](./walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions), or any other cloud deployment of your choice.
-To get started with dlt, install the library using pip:
+To get started with dlt, install the library using pip (use [clean virtual environment](reference/installation) for your experiments!):
```sh
pip install dlt
```
+
:::tip
-We recommend using a clean virtual environment for your experiments! Read the [detailed instructions](./reference/installation) on how to set up one.
+If you'd like to try out dlt without installing it on your machine, check out the [Google Colab demo](https://colab.research.google.com/drive/1NfSB1DpwbbHX9_t5vlalBTf13utwpMGx?usp=sharing) or
+use our simple [marimo / wasm based playground](./tutorial/playground) on this docs page.
:::
## Load data with dlt from …
@@ -71,12 +75,16 @@ pipeline = dlt.pipeline(
load_info = pipeline.run(source)
-# print load info and posts table as dataframe
+# print load info and posts table as data frame
print(load_info)
print(pipeline.dataset().posts.df())
```
+:::tip
+LLMs are great at generating REST API pipelines!
+* [Follow LLM tutorial](dlt-ecosystem/llm-tooling/llm-native-workflow.md) and start with one of [5,000+ sources](https://dlthub.com/workspace)
+* Follow the [REST API source tutorial](./tutorial/rest-api) to learn more about the source configuration and pagination methods.
+:::
-Follow the [REST API source tutorial](./tutorial/rest-api) to learn more about the source configuration and pagination methods.
@@ -97,7 +105,7 @@ pipeline = dlt.pipeline(
load_info = pipeline.run(source)
-# print load info and the "family" table as dataframe
+# print load info and the "family" table as data frame
print(load_info)
print(pipeline.dataset().family.df())
```
@@ -125,7 +133,7 @@ pipeline = dlt.pipeline(
load_info = pipeline.run(resource)
-# print load info and the "example" table as dataframe
+# print load info and the "example" table as data frame
print(load_info)
print(pipeline.dataset().example.df())
```
@@ -135,7 +143,7 @@ Follow the [filesystem source tutorial](./tutorial/filesystem) to learn more abo
-dlt is able to load data from Python generators or directly from Python data structures:
+dlt can load data from Python generators or directly from Python data structures:
```py
import dlt
@@ -152,7 +160,7 @@ pipeline = dlt.pipeline(
load_info = pipeline.run(foo)
-# print load info and the "foo_data" table as dataframe
+# print load info and the "foo_data" table as data frame
print(load_info)
print(pipeline.dataset().foo_data.df())
```
@@ -163,14 +171,8 @@ Check out the [Python data structures tutorial](./tutorial/load-data-from-an-api
-:::tip
-If you'd like to try out dlt without installing it on your machine, check out the [Google Colab demo](https://colab.research.google.com/drive/1NfSB1DpwbbHX9_t5vlalBTf13utwpMGx?usp=sharing) or
-use our simple [marimo / wasm based playground](./tutorial/playground) on this docs page.
-:::
-
## Join the dlt community
1. Give the library a ⭐ and check out the code on [GitHub](https://github.com/dlt-hub/dlt).
1. Ask questions and share how you use the library on [Slack](https://dlthub.com/community).
1. Report problems and make feature requests [here](https://github.com/dlt-hub/dlt/issues/new/choose).
-
diff --git a/docs/website/docs/reference/command-line-interface.md b/docs/website/docs/reference/command-line-interface.md
index 2f65db8df..25129607b 100644
--- a/docs/website/docs/reference/command-line-interface.md
+++ b/docs/website/docs/reference/command-line-interface.md
@@ -54,7 +54,7 @@ dlt [-h] [--version] [--disable-telemetry] [--enable-telemetry]
* [`init`](#dlt-init) - Creates a pipeline project in the current folder by adding existing verified source or creating a new one from template.
* [`render-docs`](#dlt-render-docs) - Renders markdown version of cli docs
* [`deploy`](#dlt-deploy) - Creates a deployment package for a selected pipeline script
-* [`dashboard`](#dlt-dashboard) - Starts the dlt pipeline dashboard
+* [`dashboard`](#dlt-dashboard) - Starts the dlt workspace dashboard
* [`ai`](#dlt-ai) - Use ai-powered development tools and utilities
@@ -145,7 +145,7 @@ Inherits arguments from [`dlt`](#dlt).
**Available subcommands**
* [`info`](#dlt-pipeline-info) - Displays state of the pipeline, use -v or -vv for more info
-* [`show`](#dlt-pipeline-show) - Generates and launches streamlit app with the loading status and dataset explorer
+* [`show`](#dlt-pipeline-show) - Generates and launches workspace dashboard with the loading status and dataset explorer
* [`failed-jobs`](#dlt-pipeline-failed-jobs) - Displays information on all the failed loads in all completed packages, failed jobs and associated error messages
* [`drop-pending-packages`](#dlt-pipeline-drop-pending-packages) - Deletes all extracted and normalized packages including those that are partially loaded.
* [`sync`](#dlt-pipeline-sync) - Drops the local state of the pipeline and resets all the schemas and restores it from destination. the destination state, data and schemas are left intact.
@@ -185,7 +185,7 @@ Inherits arguments from [`dlt pipeline`](#dlt-pipeline).
### `dlt pipeline show`
-Generates and launches Streamlit app with the loading status and dataset explorer.
+Generates and launches workspace dashboard with the loading status and dataset explorer.
**Usage**
```sh
@@ -194,11 +194,11 @@ dlt pipeline [pipeline_name] show [-h] [--streamlit] [--edit]
**Description**
-Launches the pipeline dashboard app with a comprehensive interface to inspect the pipeline state, schemas, and data in the destination.
+Launches the workspace dashboard with a comprehensive interface to inspect the pipeline state, schemas, and data in the destination.
-This app should be executed from the same folder from which you ran the pipeline script to be able access destination credentials.
+This dashboard should be executed from the same folder from which you ran the pipeline script to be able access destination credentials.
-If the --edit flag is used, will launch the editable version of the app if it exists in the current directory, or create this version and launch it in edit mode.
+If the --edit flag is used, will launch the editable version of the dashboard if it exists in the current directory, or create this version and launch it in edit mode.
Requires `marimo` to be installed in the current environment: `pip install marimo`. Use the --streamlit flag to launch the legacy streamlit app.
@@ -210,8 +210,8 @@ Inherits arguments from [`dlt pipeline`](#dlt-pipeline).
**Options**
* `-h, --help` - Show this help message and exit
-* `--streamlit` - Launch the legacy streamlit dashboard instead of the new pipeline dashboard.
-* `--edit` - Creates editable version of pipeline dashboard in current directory if it does not exist there yet and launches it in edit mode. will have no effect when using the streamlit flag.
+* `--streamlit` - Launch the legacy streamlit dashboard instead of the new workspace dashboard.
+* `--edit` - Creates editable version of workspace dashboard in current directory if it does not exist there yet and launches it in edit mode. will have no effect when using the streamlit flag.
@@ -700,7 +700,7 @@ Inherits arguments from [`dlt deploy`](#dlt-deploy).
## `dlt dashboard`
-Starts the dlt pipeline dashboard.
+Starts the dlt workspace dashboard.
**Usage**
```sh
@@ -709,7 +709,7 @@ dlt dashboard [-h] [--pipelines-dir PIPELINES_DIR] [--edit]
**Description**
-The `dlt dashboard` command starts the dlt pipeline dashboard. You can use the dashboard:
+The `dlt dashboard` command starts the dlt workspace dashboard. You can use the dashboard:
* to list and inspect local pipelines
* browse the full pipeline schema and all hints
diff --git a/docs/website/docs/tutorial/filesystem.md b/docs/website/docs/tutorial/filesystem.md
index c39f7d1ac..ff3fb5c80 100644
--- a/docs/website/docs/tutorial/filesystem.md
+++ b/docs/website/docs/tutorial/filesystem.md
@@ -362,9 +362,13 @@ Check out [other examples](../dlt-ecosystem/verified-sources/filesystem/advanced
Congratulations on completing the tutorial! You've learned how to set up a filesystem source in dlt and run a data pipeline to load the data into DuckDB.
+With your pipeline code ready, we recommend the following next steps:
+- Inspect your pipeline and data in [workspace dashboard](../general-usage/dashboard.md)
+- [Access your data](../general-usage/dataset-access/) using `dataset` interface
+- [Explore your data and create reports](../general-usage/dataset-access/marimo) in Marimo notebooks.
+
Interested in learning more about dlt? Here are some suggestions:
- Learn more about the filesystem source configuration in [filesystem source](../dlt-ecosystem/verified-sources/filesystem)
- Learn more about different credential types in [Built-in credentials](../general-usage/credentials/complex_types#built-in-credentials)
- Learn how to [create a custom source](./load-data-from-an-api.md) in the advanced tutorial
-
diff --git a/docs/website/docs/tutorial/load-data-from-an-api.md b/docs/website/docs/tutorial/load-data-from-an-api.md
index b01bfac62..7ecb7cd02 100644
--- a/docs/website/docs/tutorial/load-data-from-an-api.md
+++ b/docs/website/docs/tutorial/load-data-from-an-api.md
@@ -1,6 +1,6 @@
---
-title: "Build a dlt pipeline"
-description: Build a data pipeline with dlt
+title: Build advanced dlt pipeline from scratch
+description: Build custom, production grade pipeline just by writing code
keywords: [getting started, quick start, basic examples]
---
diff --git a/docs/website/docs/tutorial/rest-api.md b/docs/website/docs/tutorial/rest-api.md
index 2d5688265..79daaf27d 100644
--- a/docs/website/docs/tutorial/rest-api.md
+++ b/docs/website/docs/tutorial/rest-api.md
@@ -321,6 +321,11 @@ Read more about [incremental loading](../dlt-ecosystem/verified-sources/rest_api
Congratulations on completing the tutorial! You've learned how to set up a REST API source in dlt and run a data pipeline to load the data into DuckDB.
+With your pipeline code ready, we recommend the following next steps:
+- Inspect your pipeline and data in [workspace dashboard](../general-usage/dashboard.md)
+- [Access your data](../general-usage/dataset-access/) using `dataset` interface
+- [Explore your data and create reports](../general-usage/dataset-access/marimo) in Marimo notebooks.
+
Interested in learning more about dlt? Here are some suggestions:
- Learn more about the REST API source configuration in the [REST API source documentation](../dlt-ecosystem/verified-sources/rest_api/)
diff --git a/docs/website/docs/tutorial/sql-database.md b/docs/website/docs/tutorial/sql-database.md
index a8b4afa70..858baf7d9 100644
--- a/docs/website/docs/tutorial/sql-database.md
+++ b/docs/website/docs/tutorial/sql-database.md
@@ -267,6 +267,11 @@ In the first run of the pipeline `python sql_database_pipeline.py`, the entire t
Congratulations on completing the tutorial! You learned how to set up a SQL Database source in dlt and run a data pipeline to load the data into DuckDB.
+With your pipeline code ready, we recommend the following next steps:
+- Inspect your pipeline and data in [workspace dashboard](../general-usage/dashboard.md)
+- [Access your data](../general-usage/dataset-access/) using `dataset` interface
+- [Explore your data and create reports](../general-usage/dataset-access/marimo) in Marimo notebooks.
+
Interested in learning more about dlt? Here are some suggestions:
- Learn more about the SQL Database source configuration in [the SQL Database source reference](../dlt-ecosystem/verified-sources/sql_database)
- Learn how to extract [single tables and use fast `arrow` and `connectorx` backends](../dlt-ecosystem/verified-sources/sql_database/configuration.md)
diff --git a/docs/website/docs/walkthroughs/create-a-pipeline.md b/docs/website/docs/walkthroughs/create-a-pipeline.md
index 73a417f91..205e4e3ad 100644
--- a/docs/website/docs/walkthroughs/create-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/create-a-pipeline.md
@@ -155,7 +155,7 @@ You will need to install `pip dlt[workspace]`
dlt pipeline github_api_pipeline show
```
-This will open the pipeline dashboard app that gives you an overview of the data loaded.
+This will open the workspace dashboard app that gives you an overview of the data loaded.
## 5. Next steps
diff --git a/docs/website/docs/walkthroughs/run-a-pipeline.md b/docs/website/docs/walkthroughs/run-a-pipeline.md
index 999f9005a..a0c99eead 100644
--- a/docs/website/docs/walkthroughs/run-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/run-a-pipeline.md
@@ -89,7 +89,7 @@ table, do SQL queries, etc., by executing the following command from the same fo
dlt pipeline chess_pipeline show
```
-This will launch the pipeline dashboard, which you can open in your browser:
+This will launch the workspace dashboard, which you can open in your browser:
```text
Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index e6bbe559b..3e5f824ae 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -37,6 +37,7 @@ const sidebars = {
},
items: [
'reference/installation',
+ "dlt-ecosystem/llm-tooling/llm-native-workflow",
'tutorial/rest-api',
'tutorial/sql-database',
'tutorial/filesystem',
@@ -115,7 +116,7 @@ const sidebars = {
},
items: [
'dlt-ecosystem/verified-sources/rest_api/basic',
- 'dlt-ecosystem/verified-sources/rest_api/advanced',
+ 'dlt-ecosystem/verified-sources/rest_api/advanced',
{
type: 'category',
label: 'REST API helpers',
@@ -128,6 +129,12 @@ const sidebars = {
'general-usage/http/requests',
]
},
+ {
+ type: 'link',
+ label: '5k+ REST APIs with LLMs',
+ description: 'Pick one of 5k+ REST APIs from LLM context',
+ href: 'https://dlthub.com/workspace',
+ },
]
},
{
@@ -238,7 +245,7 @@ const sidebars = {
items: [
'walkthroughs/create-a-pipeline',
'walkthroughs/run-a-pipeline',
- {
+ /*{
type: "category",
label: "Build with LLMs",
link: {
@@ -250,7 +257,7 @@ const sidebars = {
items: [
"dlt-ecosystem/llm-tooling/llm-native-workflow",
]
- },
+ },*/
{
type: 'category',
label: 'Load data incrementally',
@@ -477,7 +484,7 @@ const sidebars = {
items: [
'hub/intro',
'hub/getting-started/installation',
- 'dlt-ecosystem/llm-tooling/llm-native-workflow',
+ { type: 'ref', id: 'dlt-ecosystem/llm-tooling/llm-native-workflow' },
]
},
{
@@ -501,7 +508,7 @@ const sidebars = {
type: 'category',
label: 'Ensure data quality',
items: [
- 'general-usage/dashboard',
+ { type: 'ref', id: 'general-usage/dashboard' },
'hub/features/mcp-server',
'hub/features/quality/data-quality',
]
@@ -510,8 +517,8 @@ const sidebars = {
type: 'category',
label: 'Create reports and transformations',
items: [
- 'general-usage/dataset-access/marimo',
- 'general-usage/dataset-access/dataset',
+ { type: 'ref', id: 'general-usage/dataset-access/marimo' },
+ { type: 'ref', id: 'general-usage/dataset-access/dataset' },
'hub/features/transformations/index',
'hub/features/transformations/dbt-transformations',
]
@@ -553,20 +560,20 @@ const sidebars = {
],
};
-// insert examples
+ // insert examples
for (const item of sidebars.docsSidebar) {
- if (item.label === 'Code examples') {
- for (let examplePath of walkSync("./docs_processed/examples")) {
- examplePath = examplePath.replace("docs_processed/", "");
- examplePath = examplePath.replace(".mdx", "");
- examplePath = examplePath.replace(".md", "");
- item.items.push(examplePath);
+ if (item.label === 'Code examples') {
+ for (let examplePath of walkSync("./docs_processed/examples")) {
+ examplePath = examplePath.replace("docs_processed/", "");
+ examplePath = examplePath.replace(".mdx", "");
+ examplePath = examplePath.replace(".md", "");
+ item.items.push(examplePath);
}
}
}
-// inject api reference if it exists
+ // inject api reference if it exists
if (fs.existsSync('./docs_processed/api_reference/sidebar.json')) {
for (const item of sidebars.docsSidebar) {
if (item.label === 'Reference') {
diff --git a/tests/e2e/helpers/dashboard/test_e2e.py b/tests/e2e/helpers/dashboard/test_e2e.py
index fe446b7f3..8c5c356fb 100644
--- a/tests/e2e/helpers/dashboard/test_e2e.py
+++ b/tests/e2e/helpers/dashboard/test_e2e.py
@@ -131,13 +131,13 @@ def test_page_overview(page: Page):
_go_home(page)
# check title
- expect(page).to_have_title("dlt pipeline dashboard")
+ expect(page).to_have_title("dlt workspace dashboard")
# check top heading
expect(
- page.get_by_role("heading", name="Welcome to the dltHub pipeline dashboard...")
+ page.get_by_role("heading", name="Welcome to the dltHub workspace dashboard...")
).to_contain_text(
- "Welcome to the dltHub pipeline dashboard..."
+ "Welcome to the dltHub workspace dashboard..."
) #
#