mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
* move pyproject.toml and makefile from old branch and add inbetween changes
* update workflow files to use uv
* run new version of formatter
* fix building of images with uv
* possibly fix docs linting
* downgrade lancedb dependency to fix tests
* fix gcs compat mode for s3 for newest boto
* fix docstrings in examples
* add some uv constraints
* update readme.md and contributing.md and some other places
* allow duckdb 0.8 in range
* add link-mode copy to uv venv on windows
* remove poetry lockfile and unneeded lockfile checker
* fix chess api related failures
* sleep after dremio start..
* set correct package in pyproject
* Revert "add some uv constraints"
This reverts commit d611e9ecce.
# Conflicts:
# pyproject.toml
# uv.lock
* add missing databricks sql connector version bounds
171 lines
6.2 KiB
Python
171 lines
6.2 KiB
Python
import os
|
|
import dataclasses
|
|
import logging
|
|
from typing import Dict, List, Any
|
|
|
|
# patch which providers to enable
|
|
from dlt.common.configuration.providers import (
|
|
ConfigProvider,
|
|
EnvironProvider,
|
|
SecretsTomlProvider,
|
|
ConfigTomlProvider,
|
|
GoogleSecretsProvider,
|
|
)
|
|
from dlt.common.configuration.specs.config_providers_context import (
|
|
ConfigProvidersConfiguration,
|
|
)
|
|
from dlt.common.runtime.run_context import RunContext
|
|
|
|
|
|
def initial_providers(self) -> List[ConfigProvider]:
|
|
# do not read the global config
|
|
return [
|
|
EnvironProvider(),
|
|
SecretsTomlProvider(settings_dir="tests/.dlt"),
|
|
ConfigTomlProvider(settings_dir="tests/.dlt"),
|
|
]
|
|
|
|
|
|
RunContext.initial_providers = initial_providers # type: ignore[method-assign]
|
|
# also disable extras
|
|
ConfigProvidersConfiguration.enable_airflow_secrets = False
|
|
ConfigProvidersConfiguration.enable_google_secrets = False
|
|
|
|
CACHED_GOOGLE_SECRETS: Dict[str, Any] = {}
|
|
|
|
|
|
class CachedGoogleSecretsProvider(GoogleSecretsProvider):
|
|
def _look_vault(self, full_key, hint):
|
|
if full_key not in CACHED_GOOGLE_SECRETS:
|
|
CACHED_GOOGLE_SECRETS[full_key] = super()._look_vault(full_key, hint)
|
|
return CACHED_GOOGLE_SECRETS[full_key]
|
|
|
|
def _list_vault(self):
|
|
key_ = "__list_vault"
|
|
if key_ not in CACHED_GOOGLE_SECRETS:
|
|
CACHED_GOOGLE_SECRETS[key_] = super()._list_vault()
|
|
return CACHED_GOOGLE_SECRETS[key_]
|
|
|
|
|
|
from dlt.common.configuration.providers import google_secrets
|
|
|
|
google_secrets.GoogleSecretsProvider = CachedGoogleSecretsProvider # type: ignore[misc]
|
|
|
|
|
|
def pytest_configure(config):
|
|
# patch the configurations to use test storage by default, we modify the types (classes) fields
|
|
# the dataclass implementation will use those patched values when creating instances (the values present
|
|
# in the declaration are not frozen allowing patching). this is needed by common storage tests
|
|
|
|
from dlt.common.configuration.specs import runtime_configuration
|
|
from dlt.common.storages import configuration as storage_configuration
|
|
|
|
test_storage_root = "_storage"
|
|
runtime_configuration.RuntimeConfiguration.config_files_storage_path = os.path.join(
|
|
test_storage_root, "config/"
|
|
)
|
|
# always use CI track endpoint when running tests
|
|
runtime_configuration.RuntimeConfiguration.dlthub_telemetry_endpoint = (
|
|
"https://telemetry-tracker.services4758.workers.dev"
|
|
)
|
|
|
|
delattr(runtime_configuration.RuntimeConfiguration, "__init__")
|
|
runtime_configuration.RuntimeConfiguration = dataclasses.dataclass( # type: ignore[misc]
|
|
runtime_configuration.RuntimeConfiguration, init=True, repr=False
|
|
) # type: ignore
|
|
|
|
storage_configuration.LoadStorageConfiguration.load_volume_path = os.path.join(
|
|
test_storage_root, "load"
|
|
)
|
|
delattr(storage_configuration.LoadStorageConfiguration, "__init__")
|
|
storage_configuration.LoadStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
|
|
storage_configuration.LoadStorageConfiguration, init=True, repr=False
|
|
)
|
|
|
|
storage_configuration.NormalizeStorageConfiguration.normalize_volume_path = os.path.join(
|
|
test_storage_root, "normalize"
|
|
)
|
|
# delete __init__, otherwise it will not be recreated by dataclass
|
|
delattr(storage_configuration.NormalizeStorageConfiguration, "__init__")
|
|
storage_configuration.NormalizeStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
|
|
storage_configuration.NormalizeStorageConfiguration, init=True, repr=False
|
|
)
|
|
|
|
storage_configuration.SchemaStorageConfiguration.schema_volume_path = os.path.join(
|
|
test_storage_root, "schemas"
|
|
)
|
|
delattr(storage_configuration.SchemaStorageConfiguration, "__init__")
|
|
storage_configuration.SchemaStorageConfiguration = dataclasses.dataclass( # type: ignore[misc,call-overload]
|
|
storage_configuration.SchemaStorageConfiguration, init=True, repr=False
|
|
)
|
|
|
|
assert runtime_configuration.RuntimeConfiguration.config_files_storage_path == os.path.join(
|
|
test_storage_root, "config/"
|
|
)
|
|
assert runtime_configuration.RuntimeConfiguration().config_files_storage_path == os.path.join(
|
|
test_storage_root, "config/"
|
|
)
|
|
|
|
# path pipeline instance id up to millisecond
|
|
from dlt.common import pendulum
|
|
from dlt.pipeline.pipeline import Pipeline
|
|
|
|
def _create_pipeline_instance_id(self) -> str:
|
|
return pendulum.now().format("_YYYYMMDDhhmmssSSSS")
|
|
|
|
Pipeline._create_pipeline_instance_id = _create_pipeline_instance_id # type: ignore[method-assign]
|
|
|
|
# disable sqlfluff logging
|
|
for log in ["sqlfluff.parser", "sqlfluff.linter", "sqlfluff.templater", "sqlfluff.lexer"]:
|
|
logging.getLogger(log).setLevel("ERROR")
|
|
|
|
# disable snowflake logging
|
|
for log in ["snowflake.connector.cursor", "snowflake.connector.connection"]:
|
|
logging.getLogger(log).setLevel("ERROR")
|
|
|
|
# disable azure logging
|
|
for log in ["azure.core.pipeline.policies.http_logging_policy"]:
|
|
logging.getLogger(log).setLevel("ERROR")
|
|
|
|
# disable databricks logging
|
|
for log in ["databricks.sql.client"]:
|
|
logging.getLogger(log).setLevel("WARNING")
|
|
|
|
# disable httpx request logging (too verbose when testing qdrant)
|
|
logging.getLogger("httpx").setLevel("WARNING")
|
|
|
|
# disable googleapiclient logging
|
|
logging.getLogger("googleapiclient.discovery_cache").setLevel("WARNING")
|
|
|
|
# disable pyiceberg logging
|
|
logging.getLogger("pyiceberg").setLevel("WARNING")
|
|
|
|
# reset and init airflow db
|
|
import warnings
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
|
|
|
try:
|
|
from airflow.utils import db
|
|
import contextlib
|
|
import io
|
|
|
|
for log in [
|
|
"airflow.models.crypto",
|
|
"airflow.models.variable",
|
|
"airflow",
|
|
"alembic",
|
|
"alembic.runtime.migration",
|
|
]:
|
|
logging.getLogger(log).setLevel("ERROR")
|
|
|
|
with (
|
|
contextlib.redirect_stdout(io.StringIO()),
|
|
contextlib.redirect_stderr(io.StringIO()),
|
|
):
|
|
db.resetdb()
|
|
|
|
except Exception:
|
|
pass
|