dlt/dlt/common/configuration/resolve.py

import itertools
from collections.abc import Mapping as C_Mapping
import os
from typing import Any, Dict, ContextManager, List, Optional, Sequence, Tuple, Type, TypeVar, Union

from dlt.common import logger
from dlt.common.configuration.providers.provider import (
    ConfigProvider,
    EXPLICIT_VALUES_PROVIDER_NAME,
)
from dlt.common.configuration.const import TYPE_EXAMPLES
from dlt.common.typing import (
    AnyType,
    ConfigValueSentinel,
    StrAny,
    TSecretValue,
    get_all_types_of_class_in_union,
    is_optional_type,
    is_subclass,
    is_union_type,
)

from dlt.common.configuration.specs.base_configuration import (
    BaseConfiguration,
    CredentialsConfiguration,
    is_secret_hint,
    extract_inner_hint,
    is_context_inner_hint,
    is_base_configuration_inner_hint,
    is_valid_hint,
    is_hint_not_resolvable,
)
from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
from dlt.common.configuration.specs.exceptions import NativeValueError
from dlt.common.configuration.specs.pluggable_run_context import PluggableRunContext
from dlt.common.configuration.container import Container
from dlt.common.configuration.utils import log_traces, deserialize_value
from dlt.common.configuration.exceptions import (
    FieldLookupTraces,
    LookupTrace,
    ConfigFieldMissingException,
    ConfigurationWrongTypeException,
    LookupTraces,
    NestedLookupTraces,
    ValueNotSecretException,
    InvalidNativeValue,
    UnmatchedConfigHintResolversException,
)

TConfiguration = TypeVar("TConfiguration", bound=BaseConfiguration)


def resolve_configuration(
    config: TConfiguration,
    *,
    sections: Tuple[str, ...] = (),
    explicit_value: Any = None,
    accept_partial: bool = False,
) -> TConfiguration:
    if not isinstance(config, BaseConfiguration) or not hasattr(config.__class__, "__configspec__"):
        raise ConfigurationWrongTypeException(type(config))

    # try to get the native representation of the top level configuration using the config section as a key
    # allows, for example, to store connection string or service.json in their native form in single env variable or under single vault key
    # this happens only when explicit value for the configuration was not provided
    # TODO: we can move it into _resolve_configuration and also remove similar code in _resolve_config_field
    # TODO: also allow when explicit_value is dict so we can parse initial value and merge with it
    if config.__section__ and explicit_value is None:
        initial_hint = TSecretValue if isinstance(config, CredentialsConfiguration) else AnyType
        initial_value, traces = _resolve_single_value(
            config.__section__, initial_hint, AnyType, None, sections, ()
        )
        # mappings cannot be used as explicit values, we want to enumerate mappings and request the fields' values one by one
        if initial_value is not None and not isinstance(initial_value, C_Mapping):
            explicit_value = initial_value
            log_traces(None, config.__section__, type(config), initial_value, None, traces)

    return _resolve_configuration(config, sections, (), explicit_value, accept_partial)


def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfiguration:
    """Instantiate credentials of type `hint` with `initial_value`. The initial value must be a native representation (typically string)
    or a dictionary corresponding to credential's fields. In case of union of credentials, the first configuration in the union fully resolved by
    initial value will be instantiated."""
    # use passed credentials as initial value. initial value may resolve credentials
    if is_union_type(hint):
        specs_in_union = get_all_types_of_class_in_union(hint, CredentialsConfiguration)
        assert len(specs_in_union) > 0
        first_credentials: CredentialsConfiguration = None
        for idx, spec in enumerate(specs_in_union):
            try:
                credentials = spec.from_init_value(initial_value)
                if credentials.is_resolved():
                    return credentials
                # keep first credentials in the union to return in case all of the match but not resolve
                first_credentials = first_credentials or credentials
            except (NativeValueError, NotImplementedError):
                # if none of specs in union parsed
                if idx == len(specs_in_union) - 1 and first_credentials is None:
                    raise
        return first_credentials
    else:
        assert is_subclass(hint, CredentialsConfiguration)
        return hint.from_init_value(initial_value)  # type: ignore


def inject_section(
    section_context: ConfigSectionContext, merge_existing: bool = True, lock_context: bool = False
) -> ContextManager[ConfigSectionContext]:
    """Context manager that sets section specified in `section_context` to be used during configuration resolution. Optionally merges the context already in the container with the one provided

    Args:
        section_context (ConfigSectionContext): Instance providing a pipeline name and section context
        merge_existing (bool, optional): Merges existing section context with `section_context` in the arguments by executing `merge_style` function on `section_context`. Defaults to True.
        lock_context (bool, optional): Instruct to threadlock the current thread to prevent race conditions in context injection.

    Default Merge Style:
        Gets `pipeline_name` and `sections` from existing context if they are not provided in `section_context` argument.

    Yields:
        Iterator[ConfigSectionContext]: Context manager with current section context
    """
    container = Container()
    existing_context = container[ConfigSectionContext]

    if merge_existing:
        section_context.merge(existing_context)

    return container.injectable_context(section_context, lock_context=lock_context)


def _maybe_parse_native_value(
    config: TConfiguration, native_value: Any, embedded_sections: Tuple[str, ...]
) -> Dict[str, Any]:
    """Parses `native_value` via `config.parse_native_representation` and returns a dict of all fields that are different from
    defaults. Note that `config` will be modified.
    """
    is_explicit_instance = isinstance(native_value, BaseConfiguration)
    # if explicit value is a mapping it will be applied field by field later
    if native_value is not None and (
        not isinstance(native_value, C_Mapping) or is_explicit_instance
    ):
        try:
            try:
                # parse the native value anyway because there are configs with side effects
                config.parse_native_representation(native_value)
            except (ValueError, NotImplementedError):
                # allow native_values that are already config classes to skip parsing
                # note that we still try to do that - some like Incremental are able to initialize form those
                if not is_explicit_instance:
                    raise
            # parse native value and convert it into dict, extract the diff and use it as exact value
            # explicit_value may not be complete ie. may be a connection string without password
            # we want the resolve to still fill missing values
            native_value = {
                k: v
                for k, v in config.__class__.from_init_value(native_value)
                .as_dict_nondefault()
                .items()
            }
        except ValueError as v_err:
            raise InvalidNativeValue(type(config), type(native_value), embedded_sections, v_err)
        except NotImplementedError:
            pass

    return native_value  # type: ignore[no-any-return]


def _resolve_configuration(
    config: TConfiguration,
    explicit_sections: Tuple[str, ...],
    embedded_sections: Tuple[str, ...],
    explicit_value: Any,
    accept_partial: bool,
) -> TConfiguration:
    # do not resolve twice
    if config.is_resolved():
        return config

    config.__exception__ = None
    try:
        try:
            explicit_value = _maybe_parse_native_value(config, explicit_value, embedded_sections)
            # if native representation didn't fully resolve the config, we try to resolve field by field
            if not config.is_resolved():
                _resolve_config_fields(
                    config, explicit_value, explicit_sections, embedded_sections, accept_partial
                )
            # full configuration was resolved
            config.resolve()
        except ConfigFieldMissingException as cm_ex:
            # store the ConfigEntryMissingException to have full info on traces of missing fields
            config.__exception__ = cm_ex
            # may resolve in partial handler
            config.call_method_in_mro("on_partial")
            # if resolved then do not raise
            if not config.is_resolved() and not accept_partial:
                raise
    except Exception as ex:
        # store the exception that happened in the resolution process
        config.__exception__ = ex
        raise

    return config


def _resolve_config_fields(
    config: BaseConfiguration,
    explicit_values: StrAny,
    explicit_sections: Tuple[str, ...],
    embedded_sections: Tuple[str, ...],
    accept_partial: bool,
) -> None:
    fields = config.get_resolvable_fields()
    unresolved_fields: FieldLookupTraces = {}
    config.__resolved_fields_set__ = []

    for key, hint in fields.items():
        if key in config.__hint_resolvers__:
            # Type hint for this field is created dynamically
            hint = config.__hint_resolvers__[key](config)
        # get default and explicit values
        default_value = getattr(config, key, None)
        explicit_none = False
        explicit_value = None
        current_value = None
        # traces collected for this field
        traces: NestedLookupTraces = []

        def _set_field(is_resolvable: bool = True) -> None:
            # NOTE: we hide B023 here because the function is called only within a loop
            # collect unresolved fields
            is_resolved = config.is_field_resolved(current_value, hint)  # noqa
            if not is_resolved:
                unresolved_fields[key] = traces  # noqa
            # set value in config
            setattr(config, key, current_value)  # noqa
            # store which values were actually resolved from config providers, includes explicit values
            if (
                is_resolved
                and is_resolvable
                and (
                    default_value != current_value  # noqa
                    or explicit_value is not None  # noqa
                    or explicit_none  # noqa
                )
            ):
                config.__resolved_fields_set__.append(key)  # noqa

        if explicit_values:
            if key in explicit_values:
                # allow None to be passed in explicit values
                # so we are able to reset defaults like in regular function calls
                explicit_value = explicit_values[key]
                explicit_none = explicit_value is None
                # detect dlt.config and dlt.secrets and force injection
                if isinstance(explicit_value, ConfigValueSentinel):
                    explicit_value = None

        if is_hint_not_resolvable(hint):
            # do not resolve not resolvable, but allow for explicit values to be passed
            if not explicit_none:
                current_value = default_value if explicit_value is None else explicit_value
            traces.append(
                LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, current_value)
            )
            _set_field(is_resolvable=False)
            continue

        # explicit none skips resolution
        if not explicit_none:
            # if hint is union of configurations, any of them must be resolved
            specs_in_union: List[Type[BaseConfiguration]] = []
            if is_union_type(hint):
                # if union contains a type of explicit value which is not a valid hint then return it
                # it could be ie. sqlalchemy Engine
                if (
                    explicit_value
                    and not is_valid_hint(type(explicit_value))
                    and get_all_types_of_class_in_union(
                        hint, type(explicit_value), with_superclass=True
                    )
                ):
                    current_value = explicit_value
                else:
                    # TODO: use default_value and explicit_value to filter the right specs from union, they constrain
                    #   base configuration
                    # if is_base_configuration_inner_hint(type(default_value)) and is_base_configuration_inner_hint(type(explicit_value)):
                    #     if type(default_value) != type(explicit_value):
                    #         raise ConfigurationValueError()
                    specs_in_union = get_all_types_of_class_in_union(hint, BaseConfiguration)
                    if len(specs_in_union) == 1:
                        is_optional = is_optional_type(hint)
                        hint = Optional[specs_in_union[0]] if is_optional else specs_in_union[0]  # type: ignore[assignment]
            if not current_value:
                if len(specs_in_union) > 1:
                    is_optional = is_optional_type(hint)
                    for idx, alt_spec in enumerate(specs_in_union):
                        # return first resolved config from an union
                        try:
                            current_value, _ = _resolve_config_field(
                                key,
                                alt_spec,
                                default_value,
                                explicit_value,
                                config,
                                config.__section__,
                                explicit_sections,
                                embedded_sections,
                                accept_partial,
                            )
                            break
                        except ConfigFieldMissingException as cfm_ex:
                            # add traces from unresolved union spec
                            traces.append(
                                LookupTraces(
                                    alt_spec.__name__,
                                    cfm_ex.config.__resolved_fields_set__,
                                    idx + 1,
                                    len(specs_in_union),
                                    cfm_ex.traces,
                                )
                            )
                        except InvalidNativeValue:
                            # if none of specs in union parsed
                            if idx == len(specs_in_union) - 1:
                                raise
                else:
                    try:
                        current_value, field_traces = _resolve_config_field(
                            key,
                            hint,
                            default_value,
                            explicit_value,
                            config,
                            config.__section__,
                            explicit_sections,
                            embedded_sections,
                            accept_partial,
                        )
                        traces.extend(field_traces)
                    except ConfigFieldMissingException as cfm_ex:
                        # if `hint` was a configuration it may not resolved
                        # collect exception traces
                        traces.append(
                            LookupTraces(
                                hint.__name__,
                                cfm_ex.config.__resolved_fields_set__,
                                0,
                                0,
                                cfm_ex.traces,
                            )
                        )
                        # keep default value
                        current_value = default_value
        else:
            # set the trace for explicit none
            traces = [LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, None)]

        _set_field()

    # Check for dynamic hint resolvers which have no corresponding fields
    unmatched_hint_resolvers: List[str] = []
    for field_name in config.__hint_resolvers__:
        if field_name not in fields:
            unmatched_hint_resolvers.append(field_name)

    if unmatched_hint_resolvers:
        raise UnmatchedConfigHintResolversException(type(config).__name__, unmatched_hint_resolvers)

    if unresolved_fields:
        raise ConfigFieldMissingException(config, unresolved_fields)


def _resolve_config_field(
    key: str,
    hint: Type[Any],
    default_value: Any,
    explicit_value: Any,
    config: BaseConfiguration,
    config_section: str,
    explicit_sections: Tuple[str, ...],
    embedded_sections: Tuple[str, ...],
    accept_partial: bool,
) -> Tuple[Any, List[LookupTrace]]:
    inner_hint = extract_inner_hint(hint, preserve_literal=True)
    if explicit_value is not None:
        value = explicit_value
        # TODO: consider logging explicit values, currently initial values taken from configuration
        #  are passed as explicit values so that needs to be fixed first
        traces: List[LookupTrace] = [
            LookupTrace(EXPLICIT_VALUES_PROVIDER_NAME, embedded_sections, key, value)
        ]
    else:
        # resolve key value via active providers passing the original hint ie. to preserve TSecretValue
        # NOTE: if inner_hint is an embedded config, it won't be resolved and value is None
        value, traces = _resolve_single_value(
            key, hint, inner_hint, config_section, explicit_sections, embedded_sections
        )
        log_traces(config, key, hint, value, default_value, traces)
    # contexts must be resolved as a whole
    if is_context_inner_hint(inner_hint):
        pass
    # if inner_hint is BaseConfiguration then resolve it recursively
    elif is_base_configuration_inner_hint(inner_hint):
        if isinstance(explicit_value, BaseConfiguration) and explicit_value.is_resolved():
            # explicit value was resolved so use it as it is
            pass
        else:
            if default_value is not None:
                # parse default value and use it as embedded config
                if not isinstance(default_value, BaseConfiguration):
                    embedded_config = inner_hint()
                    _maybe_parse_native_value(embedded_config, default_value, embedded_sections)
                else:
                    # if default value was instance of configuration, use it as embedded initial
                    # NOTE: we do not deep copy default value. dataclasses force factories or immutable objects
                    embedded_config = default_value
            else:
                embedded_config = inner_hint()
            # only config with sections may look for initial values
            # TODO: all this code can be moved into _resolve_configuration
            # TODO: also allow when explicit_value is dict so we can parse initial value and merge with it
            if embedded_config.__section__ and explicit_value is None:
                # config section becomes the key if the key does not start with, otherwise it keeps its original value
                initial_key, initial_embedded = _apply_embedded_sections_to_config_sections(
                    embedded_config.__section__, embedded_sections + (key,)
                )
                # it must be a secret value is config is credentials
                initial_hint = (
                    TSecretValue
                    if isinstance(embedded_config, CredentialsConfiguration)
                    else AnyType
                )
                initial_value, initial_traces = _resolve_single_value(
                    initial_key, initial_hint, AnyType, None, explicit_sections, initial_embedded
                )
                if initial_value is not None and not isinstance(initial_value, C_Mapping):
                    traces.extend(initial_traces)
                    log_traces(
                        config,
                        initial_key,
                        type(embedded_config),
                        initial_value,
                        default_value,
                        initial_traces,
                    )
                    explicit_value = initial_value

            # check if hint optional
            is_optional = is_optional_type(hint)
            # accept partial becomes True if type is optional so we do not fail on optional configs that do not resolve fully
            accept_partial = accept_partial or is_optional
            # create new instance and pass value from the provider as initial, add key to sections
            # propagate top level config section, any other sections should be replaced with keys
            top_level_section = () if embedded_sections or not config_section else (config_section,)

            value = _resolve_configuration(
                embedded_config,
                explicit_sections,
                embedded_sections + top_level_section + (key,),
                explicit_value,
                accept_partial,
            )
            if value.is_partial() and is_optional:
                # do not return partially resolved optional embeds
                value = None
                default_value = None
    else:
        # if value is resolved, then deserialize and coerce it
        if value is not None:
            # do not deserialize explicit values
            if value is not explicit_value:
                value = deserialize_value(key, value, inner_hint)

    return default_value if value is None else value, traces


def _resolve_single_value(
    key: str,
    hint: Type[Any],
    inner_hint: Type[Any],
    config_section: str,
    explicit_sections: Tuple[str, ...],
    embedded_sections: Tuple[str, ...],
) -> Tuple[Optional[Any], List[LookupTrace]]:
    traces: List[LookupTrace] = []
    value = None

    container = Container()
    # get providers from container
    providers_context = container[PluggableRunContext].providers
    # we may be resolving context
    if is_context_inner_hint(inner_hint):
        # resolve context with context provider and do not look further
        value, _ = providers_context.context_provider.get_value(key, inner_hint, None)
        return value, traces
    if is_base_configuration_inner_hint(inner_hint):
        # cannot resolve configurations directly
        return value, traces

    # resolve a field of the config
    config_section, embedded_sections = _apply_embedded_sections_to_config_sections(
        config_section, embedded_sections
    )
    providers = providers_context.providers
    # get additional sections to look in from container
    sections_context = container[ConfigSectionContext]

    def look_sections(pipeline_name: str = None) -> Any:
        # start looking from the top provider with most specific set of sections first
        value: Any = None
        for provider in providers:
            if provider.is_empty:
                # do not query empty provider so they are not added to the trace
                continue

            value, provider_traces = resolve_single_provider_value(
                provider,
                key,
                hint,
                pipeline_name,
                config_section,
                # if explicit sections are provided, ignore the injected context
                explicit_sections or sections_context.sections,
                embedded_sections,
            )
            traces.extend(provider_traces)
            if value is not None:
                # value found, ignore other providers
                break

        return value

    # first try with pipeline name as section, if present
    if sections_context.pipeline_name:
        value = look_sections(sections_context.pipeline_name)
    # then without it
    if value is None:
        value = look_sections()

    return value, traces


def resolve_single_provider_value(
    provider: ConfigProvider,
    key: str,
    hint: Type[Any],
    pipeline_name: str = None,
    config_section: str = None,
    explicit_sections: Tuple[str, ...] = (),
    embedded_sections: Tuple[str, ...] = (),
) -> Tuple[Optional[Any], List[LookupTrace]]:
    traces: List[LookupTrace] = []

    if provider.supports_sections:
        ns = list(explicit_sections)
        # always extend with embedded sections
        ns.extend(embedded_sections)
    else:
        # if provider does not support sections and pipeline name is set then ignore it
        if pipeline_name:
            return None, traces
        else:
            # pass empty sections
            ns = []

    value = None
    while True:
        if config_section and provider.supports_sections:
            full_ns = ns.copy()
            # config section, is always present and innermost
            if config_section:
                full_ns.append(config_section)
        else:
            full_ns = ns
        value, ns_key = provider.get_value(key, hint, pipeline_name, *full_ns)
        # if secret is obtained from non secret provider, we must fail
        cant_hold_it: bool = not provider.supports_secrets and is_secret_hint(hint)
        if value is not None and cant_hold_it:
            raise ValueNotSecretException(provider.name, ns_key)

        # create trace, ignore providers that cant_hold_it
        if not cant_hold_it:
            traces.append(LookupTrace(provider.name, full_ns, ns_key, value))

        if value is not None:
            # value found, ignore further sections
            break
        if len(ns) == 0:
            # sections exhausted
            break
        # pop optional sections for less precise lookup
        ns.pop()

    if value in TYPE_EXAMPLES.values():
        _emit_placeholder_warning(value, key, ns_key, provider)
    return value, traces


def _emit_placeholder_warning(
    value: Any, key: str, full_key: str, provider: ConfigProvider
) -> None:
    msg = (
        "Placeholder value encountered when resolving config or secret:\n"
        f"resolved_key: {key}, value:{value}, section: {full_key}\n"
        "Most likely, this comes from `init`-command, which creates basic templates for "
        f"non-complex configs and secrets. The provider to adjust is {provider.name}"
    )
    if bool(provider.present_locations):
        locations = "\n".join([f"\t- {os.path.abspath(loc)}" for loc in provider.present_locations])
        msg += f" at one of these locations:\n{locations}"
    logger.warning(msg=msg)


def _apply_embedded_sections_to_config_sections(
    config_section: str, embedded_sections: Tuple[str, ...]
) -> Tuple[str, Tuple[str, ...]]:
    # for the configurations that have __section__ (config_section) defined and are embedded in other configurations,
    # the innermost embedded section replaces config_section
    if embedded_sections:
        # do not add key to embedded sections if it starts with _, those sections must be ignored
        if not embedded_sections[-1].startswith("_"):
            config_section = embedded_sections[-1]
        embedded_sections = embedded_sections[:-1]

    # remove all embedded ns starting with _
    return config_section, tuple(ns for ns in embedded_sections if not ns.startswith("_"))