Files
dlt/tests/workspace/helpers/dashboard/example_pipelines.py
anuunchin 6f925caa89 Fix/3464 sync error results in success label (#3492)
* Last executed info

* Any step failure results in failure badge

* Test adjustments
2025-12-17 17:12:32 +01:00

291 lines
9.1 KiB
Python

#
# A list of pipelines that are execute and are in various states for testing the dashboard
# TODO: generalize these and make them available for other tests
# TODO: consolidate these test pipelines with the ones in tests/e2e/helpers/dashboard
#
from typing import Any
from unittest.mock import patch
import duckdb
import dlt
import pytest
from dlt._workspace._templates._single_file_templates.fruitshop_pipeline import (
fruitshop as fruitshop_source,
)
from dlt._workspace._templates._single_file_templates.arrow_pipeline import (
resource as humans,
)
from dlt.common.destination.exceptions import (
DestinationTerminalException,
)
SUCCESS_PIPELINE_DUCKDB = "success_pipeline_duckdb"
SUCCESS_PIPELINE_FILESYSTEM = "success_pipeline_filesystem"
EXTRACT_EXCEPTION_PIPELINE = "extract_exception_pipeline"
NORMALIZE_EXCEPTION_PIPELINE = "normalize_exception_pipeline"
NEVER_RAN_PIPELINE = "never_ran_pipline"
LOAD_EXCEPTION_PIPELINE = "load_exception_pipeline"
NO_DESTINATION_PIPELINE = "no_destination_pipeline"
SYNC_EXCEPTION_PIPELINE = "sync_exception_pipeline"
ALL_PIPELINES = [
SUCCESS_PIPELINE_DUCKDB,
EXTRACT_EXCEPTION_PIPELINE,
NORMALIZE_EXCEPTION_PIPELINE,
NEVER_RAN_PIPELINE,
LOAD_EXCEPTION_PIPELINE,
NO_DESTINATION_PIPELINE,
SUCCESS_PIPELINE_FILESYSTEM,
SYNC_EXCEPTION_PIPELINE,
]
PIPELINES_WITH_EXCEPTIONS = [
EXTRACT_EXCEPTION_PIPELINE,
NORMALIZE_EXCEPTION_PIPELINE,
LOAD_EXCEPTION_PIPELINE,
SYNC_EXCEPTION_PIPELINE,
]
PIPELINES_WITH_LOAD = [SUCCESS_PIPELINE_DUCKDB, SUCCESS_PIPELINE_FILESYSTEM]
def run_success_pipeline(pipeline: dlt.Pipeline):
"""
Create a success pipeline with
* fruitshop dataset
* child table
* second schema
* incremental column
"""
pipeline.run(fruitshop_source(), schema=dlt.Schema("fruitshop"))
# we overwrite the purchases table to have a child table and an incomplete column
@dlt.resource( # type: ignore
primary_key="id",
write_disposition="merge",
columns={"incomplete": {}, "id": {"x-custom": "foo"}},
)
def purchases(inc_id=dlt.sources.incremental("id")):
"""Load purchases data from a simple python list."""
yield [
{
"id": 1,
"customer_id": 1,
"inventory_id": 1,
"quantity": 1,
"child": [
{"id": 1, "name": "child 1"},
{"id": 2, "name": "child 2"},
{"id": 3, "name": "child 3"},
],
},
{"id": 2, "customer_id": 1, "inventory_id": 2, "quantity": 2},
{"id": 3, "customer_id": 2, "inventory_id": 3, "quantity": 3},
]
pipeline.run(purchases(), schema=dlt.Schema("fruitshop"))
# write something to another schema so we have multiple schemas
customers = fruitshop_source().customers
customers.apply_hints(
nested_hints={
("child",): dlt.mark.make_nested_hints(columns=[{"name": "name", "data_type": "text"}]),
("child", "child_inner"): dlt.mark.make_nested_hints(
columns=[{"name": "name", "data_type": "text"}]
),
},
)
pipeline.run(
customers,
schema=dlt.Schema("fruitshop_customers"),
table_name="other_customers",
)
def create_success_pipeline_duckdb(pipelines_dir: str = None, db_conn: Any = None):
"""Create a test pipeline with in memory duckdb destination, properties see `run_success_pipeline`"""
import duckdb
pipeline = dlt.pipeline(
pipeline_name=SUCCESS_PIPELINE_DUCKDB,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=db_conn if db_conn else None),
)
run_success_pipeline(pipeline)
return pipeline
def create_success_pipeline_filesystem(
pipelines_dir: str = None, bucket_url: str = "_storage/data"
):
"""Create a test pipeline with filesystem destination, properties see `run_success_pipeline`"""
pipeline = dlt.pipeline(
pipeline_name=SUCCESS_PIPELINE_FILESYSTEM,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.filesystem(bucket_url=bucket_url),
)
run_success_pipeline(pipeline)
return pipeline
def create_fruitshop_duckdb_with_shared_dataset(pipelines_dir: str = None):
"""Create a test pipeline with in memory duckdb destination, properties see `run_success_pipeline`"""
import duckdb
pipeline = dlt.pipeline(
pipeline_name="fruits_test_pipeline",
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:/data.db")),
dataset_name="test_shared_dataset",
)
run_success_pipeline(pipeline)
return pipeline
def create_humans_arrow_duckdb_with_shared_dataset(
pipelines_dir: str = None,
):
"""Create a test pipeline with filesystem destination, properties see `run_success_pipeline`"""
pipeline = dlt.pipeline(
pipeline_name="humans_test_pipeline",
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:/data.db")),
dataset_name="test_shared_dataset",
)
pipeline.run(humans())
return pipeline
def create_extract_exception_pipeline(pipelines_dir: str = None):
"""Create a test pipeline with duckdb destination, raises an exception in the extract step"""
import duckdb
pipeline = dlt.pipeline(
pipeline_name=EXTRACT_EXCEPTION_PIPELINE,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
)
@dlt.resource
def broken_resource():
raise AssertionError("I am broken")
with pytest.raises(Exception):
pipeline.run(broken_resource())
return pipeline
def create_normalize_exception_pipeline(pipelines_dir: str = None):
"""Create a test pipeline with duckdb destination, raises an exception in the normalize step"""
import duckdb
pipeline = dlt.pipeline(
pipeline_name=NORMALIZE_EXCEPTION_PIPELINE,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
)
@dlt.resource
def data_with_type_conflict():
# First yield double, then string for same column - causes normalize failure with strict schema contract
yield [{"id": 1, "value": 123.4}]
yield [{"id": 2, "value": "string"}]
with pytest.raises(Exception):
pipeline.run(
data_with_type_conflict(),
schema=dlt.Schema("fruitshop"),
table_name="items",
schema_contract={"data_type": "freeze"}, # Strict mode - fail on type conflicts
)
return pipeline
def create_never_ran_pipeline(pipelines_dir: str = None):
"""Create a test pipeline with duckdb destination which never was run"""
import duckdb
pipeline = dlt.pipeline(
pipeline_name=NEVER_RAN_PIPELINE,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
)
return pipeline
def create_load_exception_pipeline(pipelines_dir: str = None):
"""Create a test pipeline with duckdb destination, raises an exception in the load step"""
@dlt.destination
def failing_destination(one, two):
raise DestinationTerminalException()
pipeline = dlt.pipeline(
pipeline_name=LOAD_EXCEPTION_PIPELINE,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.dummy(timeout=0.1),
)
with pytest.raises(Exception):
pipeline.run([1, 2, 3], table_name="items", schema=dlt.Schema("fruitshop"))
return pipeline
def create_no_destination_pipeline(pipelines_dir: str = None):
"""Create a test pipeline with no destination"""
pipeline = dlt.pipeline(
pipeline_name=NO_DESTINATION_PIPELINE,
pipelines_dir=pipelines_dir,
)
return pipeline
def create_sync_exception_pipeline(pipelines_dir: str = None):
"""Create a test pipeline that raises an exception in the sync step"""
pipeline = dlt.pipeline(
pipeline_name=SYNC_EXCEPTION_PIPELINE,
pipelines_dir=pipelines_dir,
destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
)
@dlt.resource
def dummy_data():
yield [{"id": 1, "value": "test"}]
with patch.object(pipeline, "_restore_state_from_destination") as mock_restore:
mock_restore.side_effect = ConnectionError("Cannot connect to destination for sync")
with pytest.raises(Exception) as excinfo:
pipeline.run(dummy_data())
assert "failed at `step=sync`" in str(excinfo)
return pipeline
# NOTE: this script can be run to create the test pipelines globally for manual testing of the dashboard app and cli
if __name__ == "__main__":
create_success_pipeline_duckdb()
create_success_pipeline_filesystem()
create_extract_exception_pipeline()
create_normalize_exception_pipeline()
create_never_ran_pipeline()
create_load_exception_pipeline()
create_no_destination_pipeline()
create_sync_exception_pipeline()