Files
dlt/tests/workspace/test_workspace_context.py
rudolfix 06bc05848b (chore) adds hub extra (#3428)
* adds hub extra

* makes hub module more user friendly when hub not installed

* test and lint fixes

* adds plugin version check util function

* adds dlt-runtime to hub extra, minimal import tests

* bumps to dlthub 0.20.0 alpha

* lists pipelines with cli using the same functions as dashboard, dlt pipeline will list pipelines by default

* adds configured propfiles method on context so only profiles with configs or pipelines are listed

* adds list of locations that contained actual configs to provider interface

* improves workspace and profile commands

* test fixes

* fixes tests
2025-12-05 16:15:19 +01:00

273 lines
11 KiB
Python

import os
import pytest
import pickle
import tempfile
import dlt
from dlt._workspace._workspace_context import WorkspaceRunContext, switch_context
from dlt._workspace.cli.utils import check_delete_local_data, delete_local_data
from dlt._workspace.exceptions import WorkspaceRunContextNotAvailable
from dlt._workspace.helpers.runtime.runtime_artifacts import sync_from_runtime
from dlt._workspace.profile import DEFAULT_PROFILE, read_profile_pin, save_profile_pin
from dlt._workspace.run_context import (
DEFAULT_LOCAL_FOLDER,
DEFAULT_WORKSPACE_WORKING_FOLDER,
switch_profile,
)
from dlt._workspace.cli.echo import always_choose
from dlt.common.runtime.exceptions import RunContextNotAvailable
from dlt.common.runtime.run_context import DOT_DLT, RunContext, global_dir
from dlt.common.storages.file_storage import FileStorage
from tests.pipeline.utils import assert_table_counts
from tests.utils import clean_test_storage
from tests.workspace.utils import isolated_workspace
def test_legacy_workspace() -> None:
# do not create workspace context without feature flag
with isolated_workspace("legacy", required=None) as ctx:
assert isinstance(ctx, RunContext)
# fail when getting active workspace
with pytest.raises(WorkspaceRunContextNotAvailable):
dlt.current.workspace()
def test_require_workspace_context() -> None:
with pytest.raises(RunContextNotAvailable):
with isolated_workspace("legacy", required="WorkspaceRunContext"):
pass
def test_workspace_settings() -> None:
with isolated_workspace("default") as ctx:
assert_workspace_context(ctx, "default", DEFAULT_PROFILE)
assert_dev_config()
assert ctx.configured_profiles() == [DEFAULT_PROFILE]
# has dev config
assert ctx._profile_has_config(DEFAULT_PROFILE) is True
assert ctx._profile_has_config("unknown") is False
# no pipelines
assert ctx._profile_has_pipelines(DEFAULT_PROFILE) is False
def test_workspace_profile() -> None:
with isolated_workspace("default", profile="prod") as ctx:
assert_workspace_context(ctx, "default", "prod")
# mocked global dir
assert ctx.global_dir.endswith(".global_dir")
assert set(ctx.configured_profiles()) == {"dev", "prod"}
assert ctx._profile_has_config("prod") is False
assert ctx._profile_has_config("dev") is True
# files for dev profile will be ignores
assert dlt.config["config_val"] == "config.toml"
assert dlt.config["config_val_ovr"] == "config.toml"
assert dlt.config.get("config_val_dev") is None
assert dlt.secrets["secrets_val"] == "secrets.toml"
assert dlt.secrets["secrets_val_ovr"] == "secrets.toml"
assert dlt.secrets.get("secrets_val_dev") is None
# switch profile
ctx = ctx.switch_profile("dev")
assert ctx.profile == "dev"
ctx = dlt.current.workspace()
assert ctx.profile == "dev"
assert_workspace_context(ctx, "default", "dev")
# standard global dir
assert ctx.global_dir == global_dir()
assert_dev_config()
assert ctx.configured_profiles() == ["dev"]
assert ctx._profile_has_config("dev") is True
def test_profile_switch_no_workspace():
with isolated_workspace("legacy", required=None):
with pytest.raises(RunContextNotAvailable):
switch_profile("dev")
def test_workspace_configuration():
with isolated_workspace("configured_workspace", profile="tests") as ctx:
# should be used as component for logging
assert ctx.runtime_config.pipeline_name == "component"
assert ctx.name == "name_override"
# check dirs for tests profile
assert ctx.data_dir == os.path.join(ctx.run_dir, "_data")
assert ctx.local_dir.endswith(os.path.join("_local", "tests"))
ctx = ctx.switch_profile("dev")
assert ctx.name == "name_override"
assert ctx.data_dir == os.path.join(ctx.run_dir, "_data")
# this OSS compat mode where local dir is same as run dir
assert ctx.local_dir == os.path.join(ctx.run_dir, ".")
def test_pinned_profile() -> None:
with isolated_workspace("default") as ctx:
assert ctx.profile == "dev"
assert ctx.configured_profiles() == ["dev"]
# we pin prod
save_profile_pin(ctx, "prod")
assert read_profile_pin(ctx) == "prod"
# prod is configured profile now
assert set(ctx.configured_profiles()) == {"prod", "dev"}
# because it is pinned, we still do not see it as special config
assert ctx._profile_has_config("prod") is False
# this is new default profile
ctx = switch_context(ctx.run_dir)
assert ctx.profile == "prod"
ctx = dlt.current.workspace()
assert ctx.profile == "prod"
assert_workspace_context(ctx, "default", "prod")
def test_dev_env_overwrite() -> None:
pass
def test_workspace_pipeline() -> None:
pytest.importorskip("duckdb", minversion="1.3.2")
with isolated_workspace("pipelines", profile="tests") as ctx:
# prod and test have explicit config for profiles
assert set(ctx.configured_profiles()) == {"tests", "prod"}
assert ctx._profile_has_config("tests") is True
assert ctx._profile_has_config("prod") is True
assert ctx._profile_has_pipelines("tests") is False
assert ctx._profile_has_pipelines("prod") is False
# `ducklake_pipeline` configured in config.toml
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
assert pipeline.run_context is ctx
assert pipeline.dataset_name == "lake_data"
assert pipeline.destination.destination_name == "ducklake"
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
print(load_info)
assert_table_counts(pipeline, {"table_foo": 2})
# make sure that local and working files got created
assert os.path.isfile(os.path.join(ctx.local_dir, "test_ducklake.sqlite"))
assert os.path.isdir(os.path.join(ctx.local_dir, "test_ducklake.files"))
# make sure that working folder got created
assert os.path.isdir(os.path.join(ctx.get_data_entity("pipelines"), "ducklake_pipeline"))
assert ctx._profile_has_pipelines("tests") is True
# test wipe function
with always_choose(always_choose_default=False, always_choose_value=True):
delete_local_data(ctx, check_delete_local_data(ctx, skip_data_dir=False))
# must recreate pipeline
pipeline = pipeline.drop()
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
print(load_info)
assert_table_counts(pipeline, {"table_foo": 2})
# switch to prod
ctx = ctx.switch_profile("prod")
# must re-create pipeline if context changed
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
print(load_info)
assert_table_counts(pipeline, {"table_foo": 2})
# local files point to prod
assert os.path.isfile(os.path.join(ctx.local_dir, "prod_ducklake.sqlite"))
assert os.path.isdir(os.path.join(ctx.local_dir, "prod_ducklake.files"))
# both profiles have pipelines
assert ctx._profile_has_pipelines("tests") is True
assert ctx._profile_has_pipelines("prod") is True
assert set(ctx.configured_profiles()) == {"prod", "tests"}
# switch to dev
ctx = ctx.switch_profile("dev")
assert set(ctx.configured_profiles()) == {"dev", "prod", "tests"}
def test_workspace_send_artifacts() -> None:
pytest.importorskip("duckdb", minversion="1.3.2")
# create a random temp directory for the test bucket
with tempfile.TemporaryDirectory() as temp_bucket_dir:
bucket_base = os.path.join(temp_bucket_dir, "local_bucket", "workspace_id")
send_bucket_url = os.path.join(bucket_base, "tests", "pipelines")
# mock run id to enable artifact storage
os.environ["RUNTIME__RUN_ID"] = "uniq_run_id"
# emit runtime filesystem info
os.environ["SEND__ARTIFACTS__BUCKET_URL"] = send_bucket_url
# auto create dirs
os.environ["ARTIFACTS__KWARGS"] = '{"auto_mkdir": true}'
with isolated_workspace("pipelines", profile="tests") as ctx:
# `ducklake_pipeline` configured in config.toml
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
print(ctx.run_dir)
# delete the whole workspace
clean_test_storage()
with isolated_workspace("pipelines", profile="tests") as ctx:
# now restore pipeline from bucket
os.environ["SYNC__ARTIFACTS__BUCKET_URL"] = bucket_base
sync_from_runtime()
# now pipeline sees restored state
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
assert pipeline.first_run is False
assert pipeline.default_schema_name == "ducklake"
assert pipeline.default_schema.tables["table_foo"] is not None
assert pipeline.last_trace is not None
def assert_dev_config() -> None:
# check profile toml providers
assert dlt.config["config_val"] == "config.toml"
assert dlt.config["config_val_ovr"] == "dev.config.toml"
assert dlt.config["config_val_dev"] == "dev.config.toml"
assert dlt.secrets["secrets_val"] == "secrets.toml"
assert dlt.secrets["secrets_val_ovr"] == "dev.secrets.toml"
assert dlt.secrets["secrets_val_dev"] == "dev.secrets.toml"
def assert_workspace_context(context: WorkspaceRunContext, name_prefix: str, profile: str) -> None:
# basic properties must be set
assert context.name.startswith(name_prefix)
assert context.profile == profile
assert context.default_profile == "dev"
assert context.profile in context.configured_profiles()
expected_settings = os.path.join(context.run_dir, DOT_DLT)
assert context.settings_dir == expected_settings
# path / .var / profile
expected_data_dir = os.path.join(
context.settings_dir, DEFAULT_WORKSPACE_WORKING_FOLDER, profile
)
assert context.data_dir == expected_data_dir
# got created
assert os.path.isdir(context.data_dir)
# is a default dir
assert context._has_default_working_dir() is True
# local files
expected_local_dir = os.path.join(context.run_dir, DEFAULT_LOCAL_FOLDER, profile)
assert context.local_dir == expected_local_dir
# got created
assert os.path.isdir(context.local_dir)
# test entity paths
assert context.get_data_entity("pipelines") == os.path.join(expected_data_dir, "pipelines")
# no special folders for entities
assert context.get_run_entity("sources") == context.run_dir
# settings in settings
assert context.get_setting("config.toml") == os.path.join(expected_settings, "config.toml")
# check if can be pickled
pickled_ = pickle.dumps(context)
run_context_unpickled = pickle.loads(pickled_)
assert dict(context.runtime_config) == dict(run_context_unpickled.runtime_config)
assert dict(context.config) == dict(run_context_unpickled.config)