Files
dlt/tests/conftest.py
David Scharf 3ebbfa1f9e migrate to uv (#2766)
* move pyproject.toml and makefile from old branch and add inbetween changes

* update workflow files to use uv

* run new version of formatter

* fix building of images with uv

* possibly fix docs linting

* downgrade lancedb dependency to fix tests

* fix gcs compat mode for s3 for newest boto

* fix docstrings in examples

* add some uv constraints

* update readme.md and contributing.md and some other places

* allow duckdb 0.8 in range

* add link-mode copy to uv venv on windows

* remove poetry lockfile and unneeded lockfile checker

* fix chess api related failures

* sleep after dremio start..

* set correct package in pyproject

* Revert "add some uv constraints"

This reverts commit d611e9ecce.

# Conflicts:
#	pyproject.toml
#	uv.lock

* add missing databricks sql connector version bounds
2025-06-19 10:11:24 +02:00

171 lines
6.2 KiB
Python

import os
import dataclasses
import logging
from typing import Dict, List, Any
# patch which providers to enable
from dlt.common.configuration.providers import (
ConfigProvider,
EnvironProvider,
SecretsTomlProvider,
ConfigTomlProvider,
GoogleSecretsProvider,
)
from dlt.common.configuration.specs.config_providers_context import (
ConfigProvidersConfiguration,
)
from dlt.common.runtime.run_context import RunContext
def initial_providers(self) -> List[ConfigProvider]:
# do not read the global config
return [
EnvironProvider(),
SecretsTomlProvider(settings_dir="tests/.dlt"),
ConfigTomlProvider(settings_dir="tests/.dlt"),
]
RunContext.initial_providers = initial_providers # type: ignore[method-assign]
# also disable extras
ConfigProvidersConfiguration.enable_airflow_secrets = False
ConfigProvidersConfiguration.enable_google_secrets = False
CACHED_GOOGLE_SECRETS: Dict[str, Any] = {}
class CachedGoogleSecretsProvider(GoogleSecretsProvider):
def _look_vault(self, full_key, hint):
if full_key not in CACHED_GOOGLE_SECRETS:
CACHED_GOOGLE_SECRETS[full_key] = super()._look_vault(full_key, hint)
return CACHED_GOOGLE_SECRETS[full_key]
def _list_vault(self):
key_ = "__list_vault"
if key_ not in CACHED_GOOGLE_SECRETS:
CACHED_GOOGLE_SECRETS[key_] = super()._list_vault()
return CACHED_GOOGLE_SECRETS[key_]
from dlt.common.configuration.providers import google_secrets
google_secrets.GoogleSecretsProvider = CachedGoogleSecretsProvider # type: ignore[misc]
def pytest_configure(config):
# patch the configurations to use test storage by default, we modify the types (classes) fields
# the dataclass implementation will use those patched values when creating instances (the values present
# in the declaration are not frozen allowing patching). this is needed by common storage tests
from dlt.common.configuration.specs import runtime_configuration
from dlt.common.storages import configuration as storage_configuration
test_storage_root = "_storage"
runtime_configuration.RuntimeConfiguration.config_files_storage_path = os.path.join(
test_storage_root, "config/"
)
# always use CI track endpoint when running tests
runtime_configuration.RuntimeConfiguration.dlthub_telemetry_endpoint = (
"https://telemetry-tracker.services4758.workers.dev"
)
delattr(runtime_configuration.RuntimeConfiguration, "__init__")
runtime_configuration.RuntimeConfiguration = dataclasses.dataclass( # type: ignore[misc]
runtime_configuration.RuntimeConfiguration, init=True, repr=False
) # type: ignore
storage_configuration.LoadStorageConfiguration.load_volume_path = os.path.join(
test_storage_root, "load"
)
delattr(storage_configuration.LoadStorageConfiguration, "__init__")
storage_configuration.LoadStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
storage_configuration.LoadStorageConfiguration, init=True, repr=False
)
storage_configuration.NormalizeStorageConfiguration.normalize_volume_path = os.path.join(
test_storage_root, "normalize"
)
# delete __init__, otherwise it will not be recreated by dataclass
delattr(storage_configuration.NormalizeStorageConfiguration, "__init__")
storage_configuration.NormalizeStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
storage_configuration.NormalizeStorageConfiguration, init=True, repr=False
)
storage_configuration.SchemaStorageConfiguration.schema_volume_path = os.path.join(
test_storage_root, "schemas"
)
delattr(storage_configuration.SchemaStorageConfiguration, "__init__")
storage_configuration.SchemaStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
storage_configuration.SchemaStorageConfiguration, init=True, repr=False
)
assert runtime_configuration.RuntimeConfiguration.config_files_storage_path == os.path.join(
test_storage_root, "config/"
)
assert runtime_configuration.RuntimeConfiguration().config_files_storage_path == os.path.join(
test_storage_root, "config/"
)
# path pipeline instance id up to millisecond
from dlt.common import pendulum
from dlt.pipeline.pipeline import Pipeline
def _create_pipeline_instance_id(self) -> str:
return pendulum.now().format("_YYYYMMDDhhmmssSSSS")
Pipeline._create_pipeline_instance_id = _create_pipeline_instance_id # type: ignore[method-assign]
# disable sqlfluff logging
for log in ["sqlfluff.parser", "sqlfluff.linter", "sqlfluff.templater", "sqlfluff.lexer"]:
logging.getLogger(log).setLevel("ERROR")
# disable snowflake logging
for log in ["snowflake.connector.cursor", "snowflake.connector.connection"]:
logging.getLogger(log).setLevel("ERROR")
# disable azure logging
for log in ["azure.core.pipeline.policies.http_logging_policy"]:
logging.getLogger(log).setLevel("ERROR")
# disable databricks logging
for log in ["databricks.sql.client"]:
logging.getLogger(log).setLevel("WARNING")
# disable httpx request logging (too verbose when testing qdrant)
logging.getLogger("httpx").setLevel("WARNING")
# disable googleapiclient logging
logging.getLogger("googleapiclient.discovery_cache").setLevel("WARNING")
# disable pyiceberg logging
logging.getLogger("pyiceberg").setLevel("WARNING")
# reset and init airflow db
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
try:
from airflow.utils import db
import contextlib
import io
for log in [
"airflow.models.crypto",
"airflow.models.variable",
"airflow",
"alembic",
"alembic.runtime.migration",
]:
logging.getLogger(log).setLevel("ERROR")
with (
contextlib.redirect_stdout(io.StringIO()),
contextlib.redirect_stderr(io.StringIO()),
):
db.resetdb()
except Exception:
pass