mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
* adds hub extra * makes hub module more user friendly when hub not installed * test and lint fixes * adds plugin version check util function * adds dlt-runtime to hub extra, minimal import tests * bumps to dlthub 0.20.0 alpha * lists pipelines with cli using the same functions as dashboard, dlt pipeline will list pipelines by default * adds configured propfiles method on context so only profiles with configs or pipelines are listed * adds list of locations that contained actual configs to provider interface * improves workspace and profile commands * test fixes * fixes tests
273 lines
11 KiB
Python
273 lines
11 KiB
Python
import os
|
|
import pytest
|
|
import pickle
|
|
import tempfile
|
|
|
|
import dlt
|
|
from dlt._workspace._workspace_context import WorkspaceRunContext, switch_context
|
|
from dlt._workspace.cli.utils import check_delete_local_data, delete_local_data
|
|
from dlt._workspace.exceptions import WorkspaceRunContextNotAvailable
|
|
from dlt._workspace.helpers.runtime.runtime_artifacts import sync_from_runtime
|
|
from dlt._workspace.profile import DEFAULT_PROFILE, read_profile_pin, save_profile_pin
|
|
from dlt._workspace.run_context import (
|
|
DEFAULT_LOCAL_FOLDER,
|
|
DEFAULT_WORKSPACE_WORKING_FOLDER,
|
|
switch_profile,
|
|
)
|
|
from dlt._workspace.cli.echo import always_choose
|
|
from dlt.common.runtime.exceptions import RunContextNotAvailable
|
|
from dlt.common.runtime.run_context import DOT_DLT, RunContext, global_dir
|
|
|
|
from dlt.common.storages.file_storage import FileStorage
|
|
from tests.pipeline.utils import assert_table_counts
|
|
from tests.utils import clean_test_storage
|
|
from tests.workspace.utils import isolated_workspace
|
|
|
|
|
|
def test_legacy_workspace() -> None:
|
|
# do not create workspace context without feature flag
|
|
with isolated_workspace("legacy", required=None) as ctx:
|
|
assert isinstance(ctx, RunContext)
|
|
# fail when getting active workspace
|
|
with pytest.raises(WorkspaceRunContextNotAvailable):
|
|
dlt.current.workspace()
|
|
|
|
|
|
def test_require_workspace_context() -> None:
|
|
with pytest.raises(RunContextNotAvailable):
|
|
with isolated_workspace("legacy", required="WorkspaceRunContext"):
|
|
pass
|
|
|
|
|
|
def test_workspace_settings() -> None:
|
|
with isolated_workspace("default") as ctx:
|
|
assert_workspace_context(ctx, "default", DEFAULT_PROFILE)
|
|
assert_dev_config()
|
|
assert ctx.configured_profiles() == [DEFAULT_PROFILE]
|
|
# has dev config
|
|
assert ctx._profile_has_config(DEFAULT_PROFILE) is True
|
|
assert ctx._profile_has_config("unknown") is False
|
|
# no pipelines
|
|
assert ctx._profile_has_pipelines(DEFAULT_PROFILE) is False
|
|
|
|
|
|
def test_workspace_profile() -> None:
|
|
with isolated_workspace("default", profile="prod") as ctx:
|
|
assert_workspace_context(ctx, "default", "prod")
|
|
# mocked global dir
|
|
assert ctx.global_dir.endswith(".global_dir")
|
|
assert set(ctx.configured_profiles()) == {"dev", "prod"}
|
|
assert ctx._profile_has_config("prod") is False
|
|
assert ctx._profile_has_config("dev") is True
|
|
|
|
# files for dev profile will be ignores
|
|
assert dlt.config["config_val"] == "config.toml"
|
|
assert dlt.config["config_val_ovr"] == "config.toml"
|
|
assert dlt.config.get("config_val_dev") is None
|
|
|
|
assert dlt.secrets["secrets_val"] == "secrets.toml"
|
|
assert dlt.secrets["secrets_val_ovr"] == "secrets.toml"
|
|
assert dlt.secrets.get("secrets_val_dev") is None
|
|
|
|
# switch profile
|
|
ctx = ctx.switch_profile("dev")
|
|
assert ctx.profile == "dev"
|
|
ctx = dlt.current.workspace()
|
|
assert ctx.profile == "dev"
|
|
assert_workspace_context(ctx, "default", "dev")
|
|
# standard global dir
|
|
assert ctx.global_dir == global_dir()
|
|
assert_dev_config()
|
|
assert ctx.configured_profiles() == ["dev"]
|
|
assert ctx._profile_has_config("dev") is True
|
|
|
|
|
|
def test_profile_switch_no_workspace():
|
|
with isolated_workspace("legacy", required=None):
|
|
with pytest.raises(RunContextNotAvailable):
|
|
switch_profile("dev")
|
|
|
|
|
|
def test_workspace_configuration():
|
|
with isolated_workspace("configured_workspace", profile="tests") as ctx:
|
|
# should be used as component for logging
|
|
assert ctx.runtime_config.pipeline_name == "component"
|
|
assert ctx.name == "name_override"
|
|
# check dirs for tests profile
|
|
assert ctx.data_dir == os.path.join(ctx.run_dir, "_data")
|
|
assert ctx.local_dir.endswith(os.path.join("_local", "tests"))
|
|
|
|
ctx = ctx.switch_profile("dev")
|
|
assert ctx.name == "name_override"
|
|
assert ctx.data_dir == os.path.join(ctx.run_dir, "_data")
|
|
# this OSS compat mode where local dir is same as run dir
|
|
assert ctx.local_dir == os.path.join(ctx.run_dir, ".")
|
|
|
|
|
|
def test_pinned_profile() -> None:
|
|
with isolated_workspace("default") as ctx:
|
|
assert ctx.profile == "dev"
|
|
assert ctx.configured_profiles() == ["dev"]
|
|
# we pin prod
|
|
save_profile_pin(ctx, "prod")
|
|
assert read_profile_pin(ctx) == "prod"
|
|
# prod is configured profile now
|
|
assert set(ctx.configured_profiles()) == {"prod", "dev"}
|
|
# because it is pinned, we still do not see it as special config
|
|
assert ctx._profile_has_config("prod") is False
|
|
|
|
# this is new default profile
|
|
ctx = switch_context(ctx.run_dir)
|
|
assert ctx.profile == "prod"
|
|
ctx = dlt.current.workspace()
|
|
assert ctx.profile == "prod"
|
|
assert_workspace_context(ctx, "default", "prod")
|
|
|
|
|
|
def test_dev_env_overwrite() -> None:
|
|
pass
|
|
|
|
|
|
def test_workspace_pipeline() -> None:
|
|
pytest.importorskip("duckdb", minversion="1.3.2")
|
|
|
|
with isolated_workspace("pipelines", profile="tests") as ctx:
|
|
# prod and test have explicit config for profiles
|
|
assert set(ctx.configured_profiles()) == {"tests", "prod"}
|
|
assert ctx._profile_has_config("tests") is True
|
|
assert ctx._profile_has_config("prod") is True
|
|
assert ctx._profile_has_pipelines("tests") is False
|
|
assert ctx._profile_has_pipelines("prod") is False
|
|
|
|
# `ducklake_pipeline` configured in config.toml
|
|
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
|
|
assert pipeline.run_context is ctx
|
|
assert pipeline.dataset_name == "lake_data"
|
|
assert pipeline.destination.destination_name == "ducklake"
|
|
|
|
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
|
|
print(load_info)
|
|
assert_table_counts(pipeline, {"table_foo": 2})
|
|
# make sure that local and working files got created
|
|
assert os.path.isfile(os.path.join(ctx.local_dir, "test_ducklake.sqlite"))
|
|
assert os.path.isdir(os.path.join(ctx.local_dir, "test_ducklake.files"))
|
|
# make sure that working folder got created
|
|
assert os.path.isdir(os.path.join(ctx.get_data_entity("pipelines"), "ducklake_pipeline"))
|
|
assert ctx._profile_has_pipelines("tests") is True
|
|
|
|
# test wipe function
|
|
with always_choose(always_choose_default=False, always_choose_value=True):
|
|
delete_local_data(ctx, check_delete_local_data(ctx, skip_data_dir=False))
|
|
# must recreate pipeline
|
|
pipeline = pipeline.drop()
|
|
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
|
|
print(load_info)
|
|
assert_table_counts(pipeline, {"table_foo": 2})
|
|
|
|
# switch to prod
|
|
ctx = ctx.switch_profile("prod")
|
|
# must re-create pipeline if context changed
|
|
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
|
|
load_info = pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
|
|
print(load_info)
|
|
assert_table_counts(pipeline, {"table_foo": 2})
|
|
# local files point to prod
|
|
assert os.path.isfile(os.path.join(ctx.local_dir, "prod_ducklake.sqlite"))
|
|
assert os.path.isdir(os.path.join(ctx.local_dir, "prod_ducklake.files"))
|
|
# both profiles have pipelines
|
|
assert ctx._profile_has_pipelines("tests") is True
|
|
assert ctx._profile_has_pipelines("prod") is True
|
|
assert set(ctx.configured_profiles()) == {"prod", "tests"}
|
|
|
|
# switch to dev
|
|
ctx = ctx.switch_profile("dev")
|
|
assert set(ctx.configured_profiles()) == {"dev", "prod", "tests"}
|
|
|
|
|
|
def test_workspace_send_artifacts() -> None:
|
|
pytest.importorskip("duckdb", minversion="1.3.2")
|
|
|
|
# create a random temp directory for the test bucket
|
|
with tempfile.TemporaryDirectory() as temp_bucket_dir:
|
|
bucket_base = os.path.join(temp_bucket_dir, "local_bucket", "workspace_id")
|
|
send_bucket_url = os.path.join(bucket_base, "tests", "pipelines")
|
|
|
|
# mock run id to enable artifact storage
|
|
os.environ["RUNTIME__RUN_ID"] = "uniq_run_id"
|
|
# emit runtime filesystem info
|
|
os.environ["SEND__ARTIFACTS__BUCKET_URL"] = send_bucket_url
|
|
# auto create dirs
|
|
os.environ["ARTIFACTS__KWARGS"] = '{"auto_mkdir": true}'
|
|
|
|
with isolated_workspace("pipelines", profile="tests") as ctx:
|
|
# `ducklake_pipeline` configured in config.toml
|
|
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
|
|
pipeline.run([{"foo": 1}, {"foo": 2}], table_name="table_foo")
|
|
|
|
print(ctx.run_dir)
|
|
|
|
# delete the whole workspace
|
|
clean_test_storage()
|
|
|
|
with isolated_workspace("pipelines", profile="tests") as ctx:
|
|
# now restore pipeline from bucket
|
|
os.environ["SYNC__ARTIFACTS__BUCKET_URL"] = bucket_base
|
|
sync_from_runtime()
|
|
# now pipeline sees restored state
|
|
pipeline = dlt.pipeline(pipeline_name="ducklake_pipeline")
|
|
assert pipeline.first_run is False
|
|
assert pipeline.default_schema_name == "ducklake"
|
|
assert pipeline.default_schema.tables["table_foo"] is not None
|
|
assert pipeline.last_trace is not None
|
|
|
|
|
|
def assert_dev_config() -> None:
|
|
# check profile toml providers
|
|
assert dlt.config["config_val"] == "config.toml"
|
|
assert dlt.config["config_val_ovr"] == "dev.config.toml"
|
|
assert dlt.config["config_val_dev"] == "dev.config.toml"
|
|
|
|
assert dlt.secrets["secrets_val"] == "secrets.toml"
|
|
assert dlt.secrets["secrets_val_ovr"] == "dev.secrets.toml"
|
|
assert dlt.secrets["secrets_val_dev"] == "dev.secrets.toml"
|
|
|
|
|
|
def assert_workspace_context(context: WorkspaceRunContext, name_prefix: str, profile: str) -> None:
|
|
# basic properties must be set
|
|
assert context.name.startswith(name_prefix)
|
|
assert context.profile == profile
|
|
assert context.default_profile == "dev"
|
|
assert context.profile in context.configured_profiles()
|
|
|
|
expected_settings = os.path.join(context.run_dir, DOT_DLT)
|
|
assert context.settings_dir == expected_settings
|
|
|
|
# path / .var / profile
|
|
expected_data_dir = os.path.join(
|
|
context.settings_dir, DEFAULT_WORKSPACE_WORKING_FOLDER, profile
|
|
)
|
|
assert context.data_dir == expected_data_dir
|
|
# got created
|
|
assert os.path.isdir(context.data_dir)
|
|
# is a default dir
|
|
assert context._has_default_working_dir() is True
|
|
|
|
# local files
|
|
expected_local_dir = os.path.join(context.run_dir, DEFAULT_LOCAL_FOLDER, profile)
|
|
assert context.local_dir == expected_local_dir
|
|
# got created
|
|
assert os.path.isdir(context.local_dir)
|
|
|
|
# test entity paths
|
|
assert context.get_data_entity("pipelines") == os.path.join(expected_data_dir, "pipelines")
|
|
# no special folders for entities
|
|
assert context.get_run_entity("sources") == context.run_dir
|
|
# settings in settings
|
|
assert context.get_setting("config.toml") == os.path.join(expected_settings, "config.toml")
|
|
|
|
# check if can be pickled
|
|
pickled_ = pickle.dumps(context)
|
|
run_context_unpickled = pickle.loads(pickled_)
|
|
assert dict(context.runtime_config) == dict(run_context_unpickled.runtime_config)
|
|
assert dict(context.config) == dict(run_context_unpickled.config)
|