Files
dlt/tests/pipeline/test_pipeline_trace.py
David Scharf 3ebbfa1f9e migrate to uv (#2766)
* move pyproject.toml and makefile from old branch and add inbetween changes

* update workflow files to use uv

* run new version of formatter

* fix building of images with uv

* possibly fix docs linting

* downgrade lancedb dependency to fix tests

* fix gcs compat mode for s3 for newest boto

* fix docstrings in examples

* add some uv constraints

* update readme.md and contributing.md and some other places

* allow duckdb 0.8 in range

* add link-mode copy to uv venv on windows

* remove poetry lockfile and unneeded lockfile checker

* fix chess api related failures

* sleep after dremio start..

* set correct package in pyproject

* Revert "add some uv constraints"

This reverts commit d611e9ecce.

# Conflicts:
#	pyproject.toml
#	uv.lock

* add missing databricks sql connector version bounds
2025-06-19 10:11:24 +02:00

706 lines
27 KiB
Python

from copy import deepcopy
import io
import os
import asyncio
import datetime # noqa: 251
from typing import Any, List
from unittest.mock import patch
import pytest
import requests_mock
import yaml
import dlt
from dlt.common import json
from dlt.common.configuration.specs import CredentialsConfiguration, RuntimeConfiguration
from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContainer
from dlt.common.pipeline import ExtractInfo, NormalizeInfo, LoadInfo
from dlt.common.schema import Schema
from dlt.common.runtime.telemetry import stop_telemetry
from dlt.common.typing import DictStrAny, DictStrStr, TSecretValue
from dlt.common.utils import digest128
from dlt.destinations import dummy, filesystem
from dlt.pipeline.exceptions import PipelineStepFailed
from dlt.pipeline.pipeline import Pipeline
from dlt.pipeline.trace import (
PipelineTrace,
SerializableResolvedValueTrace,
load_trace,
)
from dlt.pipeline.track import slack_notify_load_success
from dlt.extract import DltResource, DltSource
from dlt.extract.extract import describe_extract_data
from dlt.extract.pipe import Pipe
from tests.pipeline.utils import PIPELINE_TEST_CASES_PATH
from tests.utils import TEST_STORAGE_ROOT, start_test_telemetry, temporary_telemetry
def test_create_trace(toml_providers: ConfigProvidersContainer, environment: Any) -> None:
dlt.secrets["load.delete_completed_jobs"] = True
@dlt.source
def inject_tomls(
api_type=dlt.config.value,
credentials: CredentialsConfiguration = dlt.secrets.value,
secret_value: TSecretValue = TSecretValue("123"), # noqa: B008
):
@dlt.resource(write_disposition="replace", primary_key="id")
def data():
yield [{"id": 1}, {"id": 2}, {"id": 3}]
return data()
p = dlt.pipeline(destination="dummy")
# read from secrets and configs directly
databricks_creds = "databricks+connector://token:<databricks_token>@<databricks_host>:443/<database_or_schema_name>?conn_timeout=15&search_path=a,b,c"
s = dlt.secrets["databricks.credentials"]
assert s == databricks_creds
extract_info = p.extract(inject_tomls())
trace = p.last_trace
assert trace is not None
# assert p._trace is None
assert len(trace.steps) == 1
step = trace.steps[0]
assert step.step == "extract"
assert isinstance(step.started_at, datetime.datetime)
assert isinstance(step.finished_at, datetime.datetime)
assert isinstance(step.step_info, ExtractInfo)
assert step.step_info.extract_data_info == [{"name": "inject_tomls", "data_type": "source"}]
# check infos
extract_info = p.last_trace.last_extract_info
assert isinstance(extract_info, ExtractInfo)
# should have single job and single load id
assert len(extract_info.loads_ids) == 1
load_id = extract_info.loads_ids[0]
assert len(extract_info.metrics) == 1
# extract of data in the first one
metrics = extract_info.metrics[load_id][0]
# inject tomls and dlt state
assert len(metrics["job_metrics"]) == 1
assert "data" in metrics["table_metrics"]
assert set(metrics["resource_metrics"].keys()) == {"data"}
assert metrics["schema_name"] == "inject_tomls"
# check dag and hints
assert metrics["dag"] == [("data", "data")]
assert metrics["hints"]["data"] == {"write_disposition": "replace", "primary_key": "id"}
metrics = extract_info.metrics[load_id][1]
# inject tomls and dlt state
assert len(metrics["job_metrics"]) == 1
assert "_dlt_pipeline_state" in metrics["table_metrics"]
assert set(metrics["resource_metrics"].keys()) == {"_dlt_pipeline_state"}
assert metrics["schema_name"] == "inject_tomls"
# check dag and hints
assert metrics["dag"] == [("_dlt_pipeline_state", "_dlt_pipeline_state")]
# state has explicit columns set
assert metrics["hints"]["_dlt_pipeline_state"]["original_columns"] == "dict"
# check packages
assert len(extract_info.load_packages) == 1
# two jobs
print(extract_info.load_packages[0])
assert len(extract_info.load_packages[0].jobs["new_jobs"]) == 2
assert extract_info.load_packages[0].state == "extracted"
# check config trace
resolved = _find_resolved_value(trace.resolved_config_values, "api_type", [])
assert resolved.config_type_name == "TestCreateTraceInjectTomlsConfiguration"
assert resolved.value == "REST"
assert resolved.is_secret_hint is False
assert resolved.default_value is None
assert resolved.provider_name == "config.toml"
# dictionaries are not returned anymore, secrets are masked
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", [])
assert resolved is None or isinstance(resolved.value, str)
resolved = _find_resolved_value(trace.resolved_config_values, "secret_value", [])
assert resolved.is_secret_hint is True
assert resolved.value is None, "Credential is not masked"
assert resolved.default_value is None, "Credential is not masked"
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", ["databricks"])
assert resolved.is_secret_hint is True
assert resolved.value is None, "Credential is not masked"
assert_trace_serializable(trace)
# activate pipeline because other was running in assert trace
p.activate()
# extract with exception
@dlt.source
def async_exception(max_range=1):
async def get_val(v):
await asyncio.sleep(0.1)
if v % 3 == 0:
raise ValueError(v)
return v
@dlt.resource
def data():
yield from [get_val(v) for v in range(1, max_range)]
return data()
with pytest.raises(PipelineStepFailed):
p.extract(async_exception())
trace = p.last_trace
assert p._trace is None
assert len(trace.steps) == 2
step = trace.steps[1]
assert step.step == "extract"
assert isinstance(step.step_exception, str)
assert isinstance(step.step_info, ExtractInfo)
assert len(step.exception_traces) > 0
assert step.step_info.extract_data_info == [{"name": "async_exception", "data_type": "source"}]
assert_trace_serializable(trace)
extract_info = step.step_info
# only new (unprocessed) package is present, all other metrics are empty, state won't be extracted
assert len(extract_info.loads_ids) == 1
load_id = extract_info.loads_ids[0]
package = extract_info.load_packages[0]
assert package.state == "new"
# no jobs - exceptions happened before save
assert len(package.jobs["new_jobs"]) == 0
# metrics should be collected
assert len(extract_info.metrics[load_id]) == 1
# normalize
norm_info = p.normalize()
trace = p.last_trace
assert p._trace is None
assert len(trace.steps) == 3
step = trace.steps[2]
assert step.step == "normalize"
assert step.step_info is norm_info
assert_trace_serializable(trace)
assert isinstance(p.last_trace.last_normalize_info, NormalizeInfo)
assert p.last_trace.last_normalize_info.row_counts == {"_dlt_pipeline_state": 1, "data": 3}
assert len(norm_info.loads_ids) == 1
load_id = norm_info.loads_ids[0]
assert len(norm_info.metrics) == 1
# just one load package with single metrics
assert len(norm_info.metrics[load_id]) == 1
norm_metrics = norm_info.metrics[load_id][0]
# inject tomls and dlt state
assert len(norm_metrics["job_metrics"]) == 2
assert "data" in norm_metrics["table_metrics"]
# check packages
assert len(norm_info.load_packages) == 1
# two jobs
assert len(norm_info.load_packages[0].jobs["new_jobs"]) == 2
assert norm_info.load_packages[0].state == "normalized"
# load
os.environ["COMPLETED_PROB"] = "1.0" # make it complete immediately
load_info = p.load()
trace = p.last_trace
assert p._trace is None
assert len(trace.steps) == 4
step = trace.steps[3]
assert step.step == "load"
assert step.step_info is load_info
load_info = step.step_info # type: ignore[assignment]
# check packages
assert len(load_info.load_packages) == 1
# two jobs
assert load_info.load_packages[0].state == "loaded"
assert len(load_info.load_packages[0].jobs["completed_jobs"]) == 2
resolved = _find_resolved_value(trace.resolved_config_values, "completed_prob", [])
assert resolved.is_secret_hint is False
assert resolved.value == "1.0"
assert resolved.config_type_name == "DummyClientConfiguration"
assert_trace_serializable(trace)
assert isinstance(p.last_trace.last_load_info, LoadInfo)
p.activate()
# run resets the trace
load_info = inject_tomls().run()
trace = p.last_trace
assert p._trace is None
assert len(trace.steps) == 4 # extract, normalize, load, run
step = trace.steps[-1] # the last one should be run
assert step.step == "run"
assert step.step_info is load_info
assert trace.steps[0].step_info is not extract_info
step = trace.steps[-2] # the previous one should be load
assert step.step == "load"
assert step.step_info is load_info # same load info
assert trace.steps[0].step_info is not extract_info
assert_trace_serializable(trace)
assert isinstance(p.last_trace.last_load_info, LoadInfo)
assert isinstance(p.last_trace.last_normalize_info, NormalizeInfo)
assert isinstance(p.last_trace.last_extract_info, ExtractInfo)
def test_trace_schema() -> None:
os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "True"
os.environ["RESTORE_FROM_DESTINATION"] = "False"
# mock runtime env
os.environ["CIRCLECI"] = "1"
os.environ["AWS_LAMBDA_FUNCTION_NAME"] = "lambda"
@dlt.source(section="many_hints")
def many_hints(
api_type=dlt.config.value,
credentials: str = dlt.secrets.value,
secret_value: TSecretValue = TSecretValue("123"), # noqa: B008
):
# TODO: create table / column schema from typed dicts, not explicitly
@dlt.resource(
write_disposition="replace",
primary_key="id",
table_format="delta",
file_format="jsonl",
schema_contract="evolve",
columns=[
{
"name": "multi",
"data_type": "decimal",
"nullable": True,
"cluster": True,
"description": "unknown",
"merge_key": True,
"precision": 9,
"scale": 3,
"sort": True,
"variant": True,
"partition": True,
}
],
)
def data():
yield [{"id": 1, "multi": "1.2"}, {"id": 2}, {"id": 3}]
return data()
@dlt.source
def github():
@dlt.resource
def get_shuffled_events():
for _ in range(1):
with open(
"tests/normalize/cases/github.events.load_page_1_duck.json",
"r",
encoding="utf-8",
) as f:
issues = json.load(f)
yield issues
return get_shuffled_events()
@dlt.source
def async_exception(max_range=1):
async def get_val(v):
await asyncio.sleep(0.1)
if v % 3 == 0:
raise ValueError(v)
return v
@dlt.resource
def data():
yield from [get_val(v) for v in range(1, max_range)]
return data()
# create pipeline with staging to get remote_url in load step job_metrics
dummy_dest = dummy(completed_prob=1.0)
pipeline = dlt.pipeline(
pipeline_name="test_trace_schema",
destination=dummy_dest,
staging=filesystem(os.path.abspath(os.path.join(TEST_STORAGE_ROOT, "_remote_filesystem"))),
dataset_name="various",
)
# mock config
os.environ["API_TYPE"] = "REST"
os.environ["SOURCES__MANY_HINTS__CREDENTIALS"] = "CREDS"
pipeline.run([many_hints(), github()])
trace = pipeline.last_trace
pipeline._schema_storage.storage.save("trace.json", json.dumps(trace, pretty=True))
schema = dlt.Schema("trace")
trace_pipeline = dlt.pipeline(
pipeline_name="test_trace_schema_traces", destination=dummy(completed_prob=1.0)
)
trace_pipeline.run([trace], table_name="trace", schema=schema)
# add exception trace
with pytest.raises(PipelineStepFailed):
pipeline.extract(async_exception(max_range=4))
trace_exception = pipeline.last_trace
pipeline._schema_storage.storage.save(
"trace_exception.json", json.dumps(trace_exception, pretty=True)
)
trace_pipeline.run([trace_exception], table_name="trace")
inferred_trace_contract = trace_pipeline.schemas["trace"]
inferred_contract_str = inferred_trace_contract.to_pretty_yaml(remove_processing_hints=True)
# NOTE: this saves actual inferred contract (schema) to schema storage, move it to test cases if you update
# trace shapes
# TODO: create a proper schema for dlt trace and tables/columns
pipeline._schema_storage.storage.save("trace.schema.yaml", inferred_contract_str)
# print(pipeline._schema_storage.storage.storage_path)
# load the schema and use it as contract
with open(f"{PIPELINE_TEST_CASES_PATH}/contracts/trace.schema.yaml", encoding="utf-8") as f:
imported_schema = yaml.safe_load(f)
trace_contract = Schema.from_dict(imported_schema, remove_processing_hints=True)
# compare pretty forms of the schemas, they must be identical
# NOTE: if this fails you can comment this out and use contract run below to find first offending difference
# assert trace_contract.to_pretty_yaml() == inferred_contract_str
# use trace contract to load data again
contract_trace_pipeline = dlt.pipeline(
pipeline_name="test_trace_schema_traces_contract", destination=dummy(completed_prob=1.0)
)
contract_trace_pipeline.run(
[trace_exception, trace],
table_name="trace",
schema=trace_contract,
schema_contract="freeze",
)
# assert inferred_trace_contract.version_hash == trace_contract.version_hash
# print(trace_pipeline.schemas["trace"].to_pretty_yaml())
# print(pipeline._schema_storage.storage.storage_path)
# def test_trace_schema_contract() -> None:
def test_save_load_trace() -> None:
os.environ["COMPLETED_PROB"] = "1.0"
info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy")
pipeline = dlt.pipeline()
# will get trace from working dir
trace = pipeline.last_trace
assert trace is not None
assert pipeline._trace is None
assert len(trace.steps) == 4 == len(info.pipeline.last_trace.steps) # type: ignore[attr-defined]
step = trace.steps[-2] # the previous to last one should be load
assert step.step == "load"
resolved = _find_resolved_value(trace.resolved_config_values, "completed_prob", [])
assert resolved.is_secret_hint is False
assert resolved.value == "1.0"
assert resolved.config_type_name == "DummyClientConfiguration"
assert_trace_serializable(trace)
# check row counts
assert pipeline.last_trace.last_normalize_info.row_counts == {
"_dlt_pipeline_state": 1,
"data": 3,
}
# reactivate the pipeline
pipeline.activate()
# load trace and check if all elements are present
loaded_trace = load_trace(pipeline.working_dir)
print(loaded_trace.asstr(2))
assert len(trace.steps) == 4
loaded_trace_dict = deepcopy(loaded_trace.asdict())
trace_dict = deepcopy(trace.asdict())
assert loaded_trace_dict == trace_dict
# do it again to check if we are not popping
assert loaded_trace_dict == loaded_trace.asdict()
assert trace_dict == trace.asdict()
# exception also saves trace
@dlt.resource
def data():
raise NotImplementedError()
yield
with pytest.raises(PipelineStepFailed) as py_ex:
dlt.run(data(), destination="dummy")
# there's the same pipeline in exception as in previous run
assert py_ex.value.pipeline is info.pipeline
trace = load_trace(py_ex.value.pipeline.working_dir)
assert trace is not None
assert pipeline._trace is None
assert len(trace.steps) == 2 # extract with exception, also has run with exception
step = trace.steps[-2]
assert step.step == "extract"
assert step.step_exception is not None
run_step = trace.steps[-1]
assert run_step.step == "run"
assert run_step.step_exception is not None
assert step.step_exception == run_step.step_exception
assert_trace_serializable(trace)
assert pipeline.last_trace.last_normalize_info is None
def test_save_load_empty_trace() -> None:
os.environ["COMPLETED_PROB"] = "1.0"
os.environ["RESTORE_FROM_DESTINATION"] = "false"
pipeline = dlt.pipeline()
pipeline.run([], table_name="data", destination="dummy")
trace = pipeline.last_trace
assert_trace_serializable(trace)
assert len(trace.steps) == 4
pipeline.activate()
# load trace and check if all elements are present
loaded_trace = load_trace(pipeline.working_dir)
print(loaded_trace.asstr(2))
assert len(trace.steps) == 4
loaded_trace_dict = deepcopy(loaded_trace.asdict())
trace_dict = deepcopy(trace.asdict())
assert loaded_trace_dict == trace_dict
# do it again to check if we are not popping
assert loaded_trace_dict == loaded_trace.asdict()
assert trace_dict == trace.asdict()
def test_disable_trace(environment: DictStrStr) -> None:
environment["ENABLE_RUNTIME_TRACE"] = "false"
environment["COMPLETED_PROB"] = "1.0"
dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy")
assert dlt.pipeline().last_trace is None
def test_trace_on_restore_state(environment: DictStrStr) -> None:
environment["COMPLETED_PROB"] = "1.0"
def _sync_destination_patch(
self: Pipeline, destination: str = None, staging: str = None, dataset_name: str = None
):
# just wipe the pipeline simulating deleted dataset
self._wipe_working_folder()
self._configure(
self._schema_storage_config.export_schema_path,
self._schema_storage_config.import_schema_path,
False,
)
with patch.object(Pipeline, "sync_destination", _sync_destination_patch):
dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy")
assert len(dlt.pipeline().last_trace.steps) == 4
assert dlt.pipeline().last_trace.last_normalize_info.row_counts == {
"_dlt_pipeline_state": 1,
"data": 3,
}
def test_load_none_trace() -> None:
p = dlt.pipeline()
assert load_trace(p.working_dir) is None
def test_trace_telemetry(temporary_telemetry: RuntimeConfiguration) -> None:
with (
patch("dlt.common.runtime.sentry.before_send", _mock_sentry_before_send),
patch("dlt.common.runtime.anon_tracker.before_send", _mock_anon_tracker_before_send),
):
ANON_TRACKER_SENT_ITEMS.clear()
SENTRY_SENT_ITEMS.clear()
# make dummy fail all files
os.environ["FAIL_PROB"] = "1.0"
# but do not raise exceptions
os.environ["RAISE_ON_FAILED_JOBS"] = "false"
load_info = dlt.pipeline().run(
[1, 2, 3], table_name="data", destination="dummy", dataset_name="data_data"
)
# we should have 4 tracker items
assert len(ANON_TRACKER_SENT_ITEMS) == 4
expected_steps = ["extract", "normalize", "load", "run"]
for event, step in zip(ANON_TRACKER_SENT_ITEMS, expected_steps):
assert event["event"] == f"pipeline_{step}"
assert event["properties"]["success"] is True
assert event["properties"]["destination_name"] == "dummy"
assert event["properties"]["destination_type"] == "dlt.destinations.dummy"
assert event["properties"]["pipeline_name_hash"] == digest128(
load_info.pipeline.pipeline_name
)
assert event["properties"]["dataset_name_hash"] == digest128(
load_info.pipeline.dataset_name
)
assert event["properties"]["default_schema_name_hash"] == digest128(
load_info.pipeline.default_schema_name
)
assert isinstance(event["properties"]["elapsed"], float)
assert isinstance(event["properties"]["transaction_id"], str)
# check extract info
if step == "extract":
assert event["properties"]["extract_data"] == [{"name": "", "data_type": "int"}]
if step == "load":
# dummy has empty fingerprint
assert event["properties"]["destination_fingerprint"] == ""
#
# we have two failed files (state and data) that should be logged by sentry
# print(SENTRY_SENT_ITEMS)
# for item in SENTRY_SENT_ITEMS:
# # print(item)
# print(item["logentry"]["message"])
# assert len(SENTRY_SENT_ITEMS) == 4
# trace with exception
@dlt.resource
def data():
raise NotImplementedError()
yield
ANON_TRACKER_SENT_ITEMS.clear()
SENTRY_SENT_ITEMS.clear()
with pytest.raises(PipelineStepFailed):
dlt.pipeline().run(data, destination="dummy")
assert len(ANON_TRACKER_SENT_ITEMS) == 2
event = ANON_TRACKER_SENT_ITEMS[0]
assert event["event"] == "pipeline_extract"
assert event["properties"]["success"] is False
assert event["properties"]["destination_name"] == "dummy"
assert event["properties"]["destination_type"] == "dlt.destinations.dummy"
assert isinstance(event["properties"]["elapsed"], float)
# check extract info
if step == "extract":
assert event["properties"]["extract_data"] == [
{"name": "data", "data_type": "resource"}
]
# we didn't log any errors
assert len(SENTRY_SENT_ITEMS) == 0
# trace without destination and dataset
p = dlt.pipeline(pipeline_name="fresh").drop()
ANON_TRACKER_SENT_ITEMS.clear()
SENTRY_SENT_ITEMS.clear()
p.extract([1, 2, 3], table_name="data")
event = ANON_TRACKER_SENT_ITEMS[0]
assert event["event"] == "pipeline_extract"
assert event["properties"]["success"] is True
assert event["properties"]["destination_name"] is None
assert event["properties"]["destination_type"] is None
assert event["properties"]["pipeline_name_hash"] == digest128("fresh")
assert event["properties"]["dataset_name_hash"] is None
assert event["properties"]["default_schema_name_hash"] == digest128(p.default_schema_name)
# trace with dataset name
p = dlt.pipeline(pipeline_name="fresh", dataset_name="fresh_dataset").drop()
ANON_TRACKER_SENT_ITEMS.clear()
SENTRY_SENT_ITEMS.clear()
p.extract([1, 2, 3], table_name="data")
event = ANON_TRACKER_SENT_ITEMS[0]
assert event["properties"]["dataset_name_hash"] == digest128(p.dataset_name)
def test_extract_data_describe() -> None:
schema = Schema("test")
assert describe_extract_data(DltSource(schema, "sect")) == [
{"name": "test", "data_type": "source"}
]
assert describe_extract_data(DltResource(Pipe("rrr_extract"), None, False)) == [
{"name": "rrr_extract", "data_type": "resource"}
]
assert describe_extract_data([DltSource(schema, "sect")]) == [
{"name": "test", "data_type": "source"}
]
assert describe_extract_data([DltResource(Pipe("rrr_extract"), None, False)]) == [
{"name": "rrr_extract", "data_type": "resource"}
]
assert describe_extract_data(
[DltResource(Pipe("rrr_extract"), None, False), DltSource(schema, "sect")]
) == [{"name": "rrr_extract", "data_type": "resource"}, {"name": "test", "data_type": "source"}]
assert describe_extract_data([{"a": "b"}]) == [{"name": "", "data_type": "dict"}]
from pandas import DataFrame
# we assume that List content has same type
assert describe_extract_data([DataFrame(), {"a": "b"}]) == [
{"name": "", "data_type": "DataFrame"}
]
# first unnamed element in the list breaks checking info
assert describe_extract_data(
[DltResource(Pipe("rrr_extract"), None, False), DataFrame(), DltSource(schema, "sect")]
) == [{"name": "rrr_extract", "data_type": "resource"}, {"name": "", "data_type": "DataFrame"}]
def test_slack_hook(environment: DictStrStr) -> None:
stop_telemetry()
hook_url = "https://hooks.slack.com/services/T04DHMAF13Q/B04E7B1MQ1H/TDHEI123WUEE"
environment["COMPLETED_PROB"] = "1.0"
environment["GITHUB_USER"] = "rudolfix"
environment["RUNTIME__DLTHUB_TELEMETRY"] = "False"
environment["RUNTIME__SLACK_INCOMING_HOOK"] = hook_url
with requests_mock.mock() as m:
m.post(hook_url, json={})
load_info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy")
assert slack_notify_load_success(load_info.pipeline.runtime_config.slack_incoming_hook, load_info, load_info.pipeline.last_trace) == 200 # type: ignore[attr-defined]
assert m.called
message = m.last_request.json()
assert "rudolfix" in message["text"]
assert "dummy" in message["text"]
def test_broken_slack_hook(environment: DictStrStr) -> None:
environment["COMPLETED_PROB"] = "1.0"
environment["RUNTIME__SLACK_INCOMING_HOOK"] = "http://localhost:22"
load_info = dlt.pipeline().run([1, 2, 3], table_name="data", destination="dummy")
# connection error
assert slack_notify_load_success(load_info.pipeline.runtime_config.slack_incoming_hook, load_info, load_info.pipeline.last_trace) == -1 # type: ignore[attr-defined]
# pipeline = dlt.pipeline()
# assert pipeline.last_trace is not None
# assert pipeline._trace is None
# trace = load_trace(info.pipeline.working_dir)
# assert len(trace.steps) == 4
# run_step = trace.steps[-1]
# assert run_step.step == "run"
# assert run_step.step_exception is None
def _find_resolved_value(
resolved: List[SerializableResolvedValueTrace], key: str, sections: List[str]
) -> SerializableResolvedValueTrace:
return next((v for v in resolved if v.key == key and v.sections == sections), None)
ANON_TRACKER_SENT_ITEMS = []
def _mock_anon_tracker_before_send(event: DictStrAny) -> DictStrAny:
ANON_TRACKER_SENT_ITEMS.append(event)
return event
SENTRY_SENT_ITEMS = []
def _mock_sentry_before_send(event: DictStrAny, _unused_hint: Any = None) -> DictStrAny:
SENTRY_SENT_ITEMS.append(event)
return event
def assert_trace_serializable(trace: PipelineTrace) -> None:
str(trace)
trace.asstr(0)
trace.asstr(1)
trace_dict = deepcopy(trace.asdict())
# check if we do not pop
assert trace_dict == trace.asdict()
with io.BytesIO() as b:
json.typed_dump(trace, b, pretty=True)
b.getvalue()
json.dumps(trace)
# load trace to duckdb
from dlt.destinations import duckdb
trace_pipeline = dlt.pipeline("trace", destination=duckdb(":pipeline:")).drop()
trace_pipeline.run([trace], table_name="trace_data")
# print(trace_pipeline.default_schema.to_pretty_yaml())