Files
dlt/dlt/common/configuration/resolve.py
rudolfix 06bc05848b (chore) adds hub extra (#3428)
* adds hub extra

* makes hub module more user friendly when hub not installed

* test and lint fixes

* adds plugin version check util function

* adds dlt-runtime to hub extra, minimal import tests

* bumps to dlthub 0.20.0 alpha

* lists pipelines with cli using the same functions as dashboard, dlt pipeline will list pipelines by default

* adds configured propfiles method on context so only profiles with configs or pipelines are listed

* adds list of locations that contained actual configs to provider interface

* improves workspace and profile commands

* test fixes

* fixes tests
2025-12-05 16:15:19 +01:00

629 lines
27 KiB
Python

import itertools
from collections.abc import Mapping as C_Mapping
import os
from typing import Any, Dict, ContextManager, List, Optional, Sequence, Tuple, Type, TypeVar, Union
from dlt.common import logger
from dlt.common.configuration.providers.provider import (
ConfigProvider,
EXPLICIT_VALUES_PROVIDER_NAME,
)
from dlt.common.configuration.const import TYPE_EXAMPLES
from dlt.common.typing import (
AnyType,
ConfigValueSentinel,
StrAny,
TSecretValue,
get_all_types_of_class_in_union,
is_optional_type,
is_subclass,
is_union_type,
)
from dlt.common.configuration.specs.base_configuration import (
BaseConfiguration,
CredentialsConfiguration,
is_secret_hint,
extract_inner_hint,
is_context_inner_hint,
is_base_configuration_inner_hint,
is_valid_hint,
is_hint_not_resolvable,
)
from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
from dlt.common.configuration.specs.exceptions import NativeValueError
from dlt.common.configuration.specs.pluggable_run_context import PluggableRunContext
from dlt.common.configuration.container import Container
from dlt.common.configuration.utils import log_traces, deserialize_value
from dlt.common.configuration.exceptions import (
FieldLookupTraces,
LookupTrace,
ConfigFieldMissingException,
ConfigurationWrongTypeException,
LookupTraces,
NestedLookupTraces,
ValueNotSecretException,
InvalidNativeValue,
UnmatchedConfigHintResolversException,
)
TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration)
def resolve_configuration(
config: TConfiguration,
*,
sections: Tuple[str, ...] = (),
explicit_value: Any = None,
accept_partial: bool = False,
) -> TConfiguration:
if not isinstance(config, BaseConfiguration) or not hasattr(config.__class__, "__configspec__"):
raise ConfigurationWrongTypeException(type(config))
# try to get the native representation of the top level configuration using the config section as a key
# allows, for example, to store connection string or service.json in their native form in single env variable or under single vault key
# this happens only when explicit value for the configuration was not provided
# TODO: we can move it into _resolve_configuration and also remove similar code in _resolve_config_field
# TODO: also allow when explicit_value is dict so we can parse initial value and merge with it
if config.__section__ and explicit_value is None:
initial_hint = TSecretValue if isinstance(config, CredentialsConfiguration) else AnyType
initial_value, traces = _resolve_single_value(
config.__section__, initial_hint, AnyType, None, sections, ()
)
# mappings cannot be used as explicit values, we want to enumerate mappings and request the fields' values one by one
if initial_value is not None and not isinstance(initial_value, C_Mapping):
explicit_value = initial_value
log_traces(None, config.__section__, type(config), initial_value, None, traces)
return _resolve_configuration(config, sections, (), explicit_value, accept_partial)
def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfiguration:
"""Instantiate credentials of type `hint` with `initial_value`. The initial value must be a native representation (typically string)
or a dictionary corresponding to credential's fields. In case of union of credentials, the first configuration in the union fully resolved by
initial value will be instantiated."""
# use passed credentials as initial value. initial value may resolve credentials
if is_union_type(hint):
specs_in_union = get_all_types_of_class_in_union(hint, CredentialsConfiguration)
assert len(specs_in_union) > 0
first_credentials: CredentialsConfiguration = None
for idx, spec in enumerate(specs_in_union):
try:
credentials = spec.from_init_value(initial_value)
if credentials.is_resolved():
return credentials
# keep first credentials in the union to return in case all of the match but not resolve
first_credentials = first_credentials or credentials
except (NativeValueError, NotImplementedError):
# if none of specs in union parsed
if idx == len(specs_in_union) - 1 and first_credentials is None:
raise
return first_credentials
else:
assert is_subclass(hint, CredentialsConfiguration)
return hint.from_init_value(initial_value) # type: ignore
def inject_section(
section_context: ConfigSectionContext, merge_existing: bool = True, lock_context: bool = False
) -> ContextManager[ConfigSectionContext]:
"""Context manager that sets section specified in `section_context` to be used during configuration resolution. Optionally merges the context already in the container with the one provided
Args:
section_context (ConfigSectionContext): Instance providing a pipeline name and section context
merge_existing (bool, optional): Merges existing section context with `section_context` in the arguments by executing `merge_style` function on `section_context`. Defaults to True.
lock_context (bool, optional): Instruct to threadlock the current thread to prevent race conditions in context injection.
Default Merge Style:
Gets `pipeline_name` and `sections` from existing context if they are not provided in `section_context` argument.
Yields:
Iterator[ConfigSectionContext]: Context manager with current section context
"""
container = Container()
existing_context = container[ConfigSectionContext]
if merge_existing:
section_context.merge(existing_context)
return container.injectable_context(section_context, lock_context=lock_context)
def _maybe_parse_native_value(
config: TConfiguration, native_value: Any, embedded_sections: Tuple[str, ...]
) -> Dict[str, Any]:
"""Parses `native_value` via `config.parse_native_representation` and returns a dict of all fields that are different from
defaults. Note that `config` will be modified.
"""
is_explicit_instance = isinstance(native_value, BaseConfiguration)
# if explicit value is a mapping it will be applied field by field later
if native_value is not None and (
not isinstance(native_value, C_Mapping) or is_explicit_instance
):
try:
try:
# parse the native value anyway because there are configs with side effects
config.parse_native_representation(native_value)
except (ValueError, NotImplementedError):
# allow native_values that are already config classes to skip parsing
# note that we still try to do that - some like Incremental are able to initialize form those
if not is_explicit_instance:
raise
# parse native value and convert it into dict, extract the diff and use it as exact value
# explicit_value may not be complete ie. may be a connection string without password
# we want the resolve to still fill missing values
native_value = {
k: v
for k, v in config.__class__.from_init_value(native_value)
.as_dict_nondefault()
.items()
}
except ValueError as v_err:
raise InvalidNativeValue(type(config), type(native_value), embedded_sections, v_err)
except NotImplementedError:
pass
return native_value # type: ignore[no-any-return]
def _resolve_configuration(
config: TConfiguration,
explicit_sections: Tuple[str, ...],
embedded_sections: Tuple[str, ...],
explicit_value: Any,
accept_partial: bool,
) -> TConfiguration:
# do not resolve twice
if config.is_resolved():
return config
config.__exception__ = None
try:
try:
explicit_value = _maybe_parse_native_value(config, explicit_value, embedded_sections)
# if native representation didn't fully resolve the config, we try to resolve field by field
if not config.is_resolved():
_resolve_config_fields(
config, explicit_value, explicit_sections, embedded_sections, accept_partial
)
# full configuration was resolved
config.resolve()
except ConfigFieldMissingException as cm_ex:
# store the ConfigEntryMissingException to have full info on traces of missing fields
config.__exception__ = cm_ex
# may resolve in partial handler
config.call_method_in_mro("on_partial")
# if resolved then do not raise
if not config.is_resolved() and not accept_partial:
raise
except Exception as ex:
# store the exception that happened in the resolution process
config.__exception__ = ex
raise
return config
def _resolve_config_fields(
config: BaseConfiguration,
explicit_values: StrAny,
explicit_sections: Tuple[str, ...],
embedded_sections: Tuple[str, ...],
accept_partial: bool,
) -> None:
fields = config.get_resolvable_fields()
unresolved_fields: FieldLookupTraces = {}
config.__resolved_fields_set__ = []
for key, hint in fields.items():
if key in config.__hint_resolvers__:
# Type hint for this field is created dynamically
hint = config.__hint_resolvers__[key](config)
# get default and explicit values
default_value = getattr(config, key, None)
explicit_none = False
explicit_value = None
current_value = None
# traces collected for this field
traces: NestedLookupTraces = []
def _set_field(is_resolvable: bool = True) -> None:
# NOTE: we hide B023 here because the function is called only within a loop
# collect unresolved fields
is_resolved = config.is_field_resolved(current_value, hint) # noqa
if not is_resolved:
unresolved_fields[key] = traces # noqa
# set value in config
setattr(config, key, current_value) # noqa
# store which values were actually resolved from config providers, includes explicit values
if (
is_resolved
and is_resolvable
and (
default_value != current_value # noqa
or explicit_value is not None # noqa
or explicit_none # noqa
)
):
config.__resolved_fields_set__.append(key) # noqa
if explicit_values:
if key in explicit_values:
# allow None to be passed in explicit values
# so we are able to reset defaults like in regular function calls
explicit_value = explicit_values[key]
explicit_none = explicit_value is None
# detect dlt.config and dlt.secrets and force injection
if isinstance(explicit_value, ConfigValueSentinel):
explicit_value = None
if is_hint_not_resolvable(hint):
# do not resolve not resolvable, but allow for explicit values to be passed
if not explicit_none:
current_value = default_value if explicit_value is None else explicit_value
traces.append(
LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, current_value)
)
_set_field(is_resolvable=False)
continue
# explicit none skips resolution
if not explicit_none:
# if hint is union of configurations, any of them must be resolved
specs_in_union: List[Type[BaseConfiguration]] = []
if is_union_type(hint):
# if union contains a type of explicit value which is not a valid hint then return it
# it could be ie. sqlalchemy Engine
if (
explicit_value
and not is_valid_hint(type(explicit_value))
and get_all_types_of_class_in_union(
hint, type(explicit_value), with_superclass=True
)
):
current_value = explicit_value
else:
# TODO: use default_value and explicit_value to filter the right specs from union, they constrain
# base configuration
# if is_base_configuration_inner_hint(type(default_value)) and is_base_configuration_inner_hint(type(explicit_value)):
# if type(default_value) != type(explicit_value):
# raise ConfigurationValueError()
specs_in_union = get_all_types_of_class_in_union(hint, BaseConfiguration)
if len(specs_in_union) == 1:
is_optional = is_optional_type(hint)
hint = Optional[specs_in_union[0]] if is_optional else specs_in_union[0] # type: ignore[assignment]
if not current_value:
if len(specs_in_union) > 1:
is_optional = is_optional_type(hint)
for idx, alt_spec in enumerate(specs_in_union):
# return first resolved config from an union
try:
current_value, _ = _resolve_config_field(
key,
alt_spec,
default_value,
explicit_value,
config,
config.__section__,
explicit_sections,
embedded_sections,
accept_partial,
)
break
except ConfigFieldMissingException as cfm_ex:
# add traces from unresolved union spec
traces.append(
LookupTraces(
alt_spec.__name__,
cfm_ex.config.__resolved_fields_set__,
idx + 1,
len(specs_in_union),
cfm_ex.traces,
)
)
except InvalidNativeValue:
# if none of specs in union parsed
if idx == len(specs_in_union) - 1:
raise
else:
try:
current_value, field_traces = _resolve_config_field(
key,
hint,
default_value,
explicit_value,
config,
config.__section__,
explicit_sections,
embedded_sections,
accept_partial,
)
traces.extend(field_traces)
except ConfigFieldMissingException as cfm_ex:
# if `hint` was a configuration it may not resolved
# collect exception traces
traces.append(
LookupTraces(
hint.__name__,
cfm_ex.config.__resolved_fields_set__,
0,
0,
cfm_ex.traces,
)
)
# keep default value
current_value = default_value
else:
# set the trace for explicit none
traces = [LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, None)]
_set_field()
# Check for dynamic hint resolvers which have no corresponding fields
unmatched_hint_resolvers: List[str] = []
for field_name in config.__hint_resolvers__:
if field_name not in fields:
unmatched_hint_resolvers.append(field_name)
if unmatched_hint_resolvers:
raise UnmatchedConfigHintResolversException(type(config).__name__, unmatched_hint_resolvers)
if unresolved_fields:
raise ConfigFieldMissingException(config, unresolved_fields)
def _resolve_config_field(
key: str,
hint: Type[Any],
default_value: Any,
explicit_value: Any,
config: BaseConfiguration,
config_section: str,
explicit_sections: Tuple[str, ...],
embedded_sections: Tuple[str, ...],
accept_partial: bool,
) -> Tuple[Any, List[LookupTrace]]:
inner_hint = extract_inner_hint(hint, preserve_literal=True)
if explicit_value is not None:
value = explicit_value
# TODO: consider logging explicit values, currently initial values taken from configuration
# are passed as explicit values so that needs to be fixed first
traces: List[LookupTrace] = [
LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, value)
]
else:
# resolve key value via active providers passing the original hint ie. to preserve TSecretValue
# NOTE: if inner_hint is an embedded config, it won't be resolved and value is None
value, traces = _resolve_single_value(
key, hint, inner_hint, config_section, explicit_sections, embedded_sections
)
log_traces(config, key, hint, value, default_value, traces)
# contexts must be resolved as a whole
if is_context_inner_hint(inner_hint):
pass
# if inner_hint is BaseConfiguration then resolve it recursively
elif is_base_configuration_inner_hint(inner_hint):
if isinstance(explicit_value, BaseConfiguration) and explicit_value.is_resolved():
# explicit value was resolved so use it as it is
pass
else:
if default_value is not None:
# parse default value and use it as embedded config
if not isinstance(default_value, BaseConfiguration):
embedded_config = inner_hint()
_maybe_parse_native_value(embedded_config, default_value, embedded_sections)
else:
# if default value was instance of configuration, use it as embedded initial
# NOTE: we do not deep copy default value. dataclasses force factories or immutable objects
embedded_config = default_value
else:
embedded_config = inner_hint()
# only config with sections may look for initial values
# TODO: all this code can be moved into _resolve_configuration
# TODO: also allow when explicit_value is dict so we can parse initial value and merge with it
if embedded_config.__section__ and explicit_value is None:
# config section becomes the key if the key does not start with, otherwise it keeps its original value
initial_key, initial_embedded = _apply_embedded_sections_to_config_sections(
embedded_config.__section__, embedded_sections + (key,)
)
# it must be a secret value is config is credentials
initial_hint = (
TSecretValue
if isinstance(embedded_config, CredentialsConfiguration)
else AnyType
)
initial_value, initial_traces = _resolve_single_value(
initial_key, initial_hint, AnyType, None, explicit_sections, initial_embedded
)
if initial_value is not None and not isinstance(initial_value, C_Mapping):
traces.extend(initial_traces)
log_traces(
config,
initial_key,
type(embedded_config),
initial_value,
default_value,
initial_traces,
)
explicit_value = initial_value
# check if hint optional
is_optional = is_optional_type(hint)
# accept partial becomes True if type is optional so we do not fail on optional configs that do not resolve fully
accept_partial = accept_partial or is_optional
# create new instance and pass value from the provider as initial, add key to sections
# propagate top level config section, any other sections should be replaced with keys
top_level_section = () if embedded_sections or not config_section else (config_section,)
value = _resolve_configuration(
embedded_config,
explicit_sections,
embedded_sections + top_level_section + (key,),
explicit_value,
accept_partial,
)
if value.is_partial() and is_optional:
# do not return partially resolved optional embeds
value = None
default_value = None
else:
# if value is resolved, then deserialize and coerce it
if value is not None:
# do not deserialize explicit values
if value is not explicit_value:
value = deserialize_value(key, value, inner_hint)
return default_value if value is None else value, traces
def _resolve_single_value(
key: str,
hint: Type[Any],
inner_hint: Type[Any],
config_section: str,
explicit_sections: Tuple[str, ...],
embedded_sections: Tuple[str, ...],
) -> Tuple[Optional[Any], List[LookupTrace]]:
traces: List[LookupTrace] = []
value = None
container = Container()
# get providers from container
providers_context = container[PluggableRunContext].providers
# we may be resolving context
if is_context_inner_hint(inner_hint):
# resolve context with context provider and do not look further
value, _ = providers_context.context_provider.get_value(key, inner_hint, None)
return value, traces
if is_base_configuration_inner_hint(inner_hint):
# cannot resolve configurations directly
return value, traces
# resolve a field of the config
config_section, embedded_sections = _apply_embedded_sections_to_config_sections(
config_section, embedded_sections
)
providers = providers_context.providers
# get additional sections to look in from container
sections_context = container[ConfigSectionContext]
def look_sections(pipeline_name: str = None) -> Any:
# start looking from the top provider with most specific set of sections first
value: Any = None
for provider in providers:
if provider.is_empty:
# do not query empty provider so they are not added to the trace
continue
value, provider_traces = resolve_single_provider_value(
provider,
key,
hint,
pipeline_name,
config_section,
# if explicit sections are provided, ignore the injected context
explicit_sections or sections_context.sections,
embedded_sections,
)
traces.extend(provider_traces)
if value is not None:
# value found, ignore other providers
break
return value
# first try with pipeline name as section, if present
if sections_context.pipeline_name:
value = look_sections(sections_context.pipeline_name)
# then without it
if value is None:
value = look_sections()
return value, traces
def resolve_single_provider_value(
provider: ConfigProvider,
key: str,
hint: Type[Any],
pipeline_name: str = None,
config_section: str = None,
explicit_sections: Tuple[str, ...] = (),
embedded_sections: Tuple[str, ...] = (),
) -> Tuple[Optional[Any], List[LookupTrace]]:
traces: List[LookupTrace] = []
if provider.supports_sections:
ns = list(explicit_sections)
# always extend with embedded sections
ns.extend(embedded_sections)
else:
# if provider does not support sections and pipeline name is set then ignore it
if pipeline_name:
return None, traces
else:
# pass empty sections
ns = []
value = None
while True:
if config_section and provider.supports_sections:
full_ns = ns.copy()
# config section, is always present and innermost
if config_section:
full_ns.append(config_section)
else:
full_ns = ns
value, ns_key = provider.get_value(key, hint, pipeline_name, *full_ns)
# if secret is obtained from non secret provider, we must fail
cant_hold_it: bool = not provider.supports_secrets and is_secret_hint(hint)
if value is not None and cant_hold_it:
raise ValueNotSecretException(provider.name, ns_key)
# create trace, ignore providers that cant_hold_it
if not cant_hold_it:
traces.append(LookupTrace(provider.name, full_ns, ns_key, value))
if value is not None:
# value found, ignore further sections
break
if len(ns) == 0:
# sections exhausted
break
# pop optional sections for less precise lookup
ns.pop()
if value in TYPE_EXAMPLES.values():
_emit_placeholder_warning(value, key, ns_key, provider)
return value, traces
def _emit_placeholder_warning(
value: Any, key: str, full_key: str, provider: ConfigProvider
) -> None:
msg = (
"Placeholder value encountered when resolving config or secret:\n"
f"resolved_key: {key}, value:{value}, section: {full_key}\n"
"Most likely, this comes from `init`-command, which creates basic templates for "
f"non-complex configs and secrets. The provider to adjust is {provider.name}"
)
if bool(provider.present_locations):
locations = "\n".join([f"\t- {os.path.abspath(loc)}" for loc in provider.present_locations])
msg += f" at one of these locations:\n{locations}"
logger.warning(msg=msg)
def _apply_embedded_sections_to_config_sections(
config_section: str, embedded_sections: Tuple[str, ...]
) -> Tuple[str, Tuple[str, ...]]:
# for the configurations that have __section__ (config_section) defined and are embedded in other configurations,
# the innermost embedded section replaces config_section
if embedded_sections:
# do not add key to embedded sections if it starts with _, those sections must be ignored
if not embedded_sections[-1].startswith("_"):
config_section = embedded_sections[-1]
embedded_sections = embedded_sections[:-1]
# remove all embedded ns starting with _
return config_section, tuple(ns for ns in embedded_sections if not ns.startswith("_"))