mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-17 19:31:34 +00:00
476 lines
17 KiB
Python
476 lines
17 KiB
Python
import itertools
|
|
import os
|
|
from copy import deepcopy
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import (
|
|
Any,
|
|
Dict,
|
|
Iterable,
|
|
Iterator,
|
|
Mapping,
|
|
MutableSet,
|
|
Optional,
|
|
Tuple,
|
|
Type,
|
|
)
|
|
|
|
from dbt import tracking
|
|
from dbt.adapters.contracts.connection import (
|
|
AdapterRequiredConfig,
|
|
Credentials,
|
|
HasCredentials,
|
|
)
|
|
from dbt.adapters.contracts.relation import ComponentName
|
|
from dbt.adapters.factory import get_include_paths, get_relation_class_by_name
|
|
from dbt.config.project import load_raw_project
|
|
from dbt.contracts.graph.manifest import ManifestMetadata
|
|
from dbt.contracts.project import Configuration
|
|
from dbt.events.types import UnusedResourceConfigPath
|
|
from dbt.exceptions import (
|
|
ConfigContractBrokenError,
|
|
DbtProjectError,
|
|
DbtRuntimeError,
|
|
NonUniquePackageNameError,
|
|
UninstalledPackagesFoundError,
|
|
)
|
|
from dbt.flags import get_flags
|
|
from dbt_common.dataclass_schema import ValidationError
|
|
from dbt_common.events.functions import warn_or_error
|
|
from dbt_common.helper_types import DictDefaultEmptyStr, FQNPath, PathSet
|
|
|
|
from .profile import Profile
|
|
from .project import Project
|
|
from .renderer import DbtProjectYamlRenderer, ProfileRenderer
|
|
|
|
|
|
# Called by RuntimeConfig.collect_parts class method
|
|
def load_project(
|
|
project_root: str,
|
|
version_check: bool,
|
|
profile: HasCredentials,
|
|
cli_vars: Optional[Dict[str, Any]] = None,
|
|
) -> Project:
|
|
# get the project with all of the provided information
|
|
project_renderer = DbtProjectYamlRenderer(profile, cli_vars)
|
|
project = Project.from_project_root(
|
|
project_root, project_renderer, verify_version=version_check
|
|
)
|
|
|
|
# Save env_vars encountered in rendering for partial parsing
|
|
project.project_env_vars = project_renderer.ctx_obj.env_vars
|
|
return project
|
|
|
|
|
|
def load_profile(
|
|
project_root: str,
|
|
cli_vars: Dict[str, Any],
|
|
profile_name_override: Optional[str] = None,
|
|
target_override: Optional[str] = None,
|
|
threads_override: Optional[int] = None,
|
|
) -> Profile:
|
|
raw_project = load_raw_project(project_root)
|
|
raw_profile_name = raw_project.get("profile")
|
|
profile_renderer = ProfileRenderer(cli_vars)
|
|
profile_name = profile_renderer.render_value(raw_profile_name)
|
|
profile = Profile.render(
|
|
profile_renderer, profile_name, profile_name_override, target_override, threads_override
|
|
)
|
|
# Save env_vars encountered in rendering for partial parsing
|
|
profile.profile_env_vars = profile_renderer.ctx_obj.env_vars
|
|
return profile
|
|
|
|
|
|
def _project_quoting_dict(proj: Project, profile: Profile) -> Dict[ComponentName, bool]:
|
|
src: Dict[str, Any] = profile.credentials.translate_aliases(proj.quoting)
|
|
result: Dict[ComponentName, bool] = {}
|
|
for key in ComponentName:
|
|
if key in src:
|
|
value = src[key]
|
|
if isinstance(value, bool):
|
|
result[key] = value
|
|
return result
|
|
|
|
|
|
@dataclass
|
|
class RuntimeConfig(Project, Profile, AdapterRequiredConfig):
|
|
args: Any
|
|
profile_name: str
|
|
cli_vars: Dict[str, Any]
|
|
dependencies: Optional[Mapping[str, "RuntimeConfig"]] = None
|
|
|
|
def __post_init__(self):
|
|
self.validate()
|
|
|
|
@classmethod
|
|
def get_profile(
|
|
cls,
|
|
project_root: str,
|
|
cli_vars: Dict[str, Any],
|
|
args: Any,
|
|
) -> Profile:
|
|
return load_profile(
|
|
project_root,
|
|
cli_vars,
|
|
args.profile,
|
|
args.target,
|
|
args.threads,
|
|
)
|
|
|
|
# Called by 'new_project' and 'from_args'
|
|
@classmethod
|
|
def from_parts(
|
|
cls,
|
|
project: Project,
|
|
profile: Profile,
|
|
args: Any,
|
|
dependencies: Optional[Mapping[str, "RuntimeConfig"]] = None,
|
|
) -> "RuntimeConfig":
|
|
"""Instantiate a RuntimeConfig from its components.
|
|
|
|
:param profile: A parsed dbt Profile.
|
|
:param project: A parsed dbt Project.
|
|
:param args: The parsed command-line arguments.
|
|
:returns RuntimeConfig: The new configuration.
|
|
"""
|
|
quoting: Dict[str, Any] = (
|
|
get_relation_class_by_name(profile.credentials.type)
|
|
.get_default_quote_policy()
|
|
.replace_dict(_project_quoting_dict(project, profile))
|
|
).to_dict(omit_none=True)
|
|
|
|
cli_vars: Dict[str, Any] = getattr(args, "vars", {})
|
|
log_cache_events: bool = getattr(args, "log_cache_events", profile.log_cache_events)
|
|
|
|
return cls(
|
|
project_name=project.project_name,
|
|
version=project.version,
|
|
project_root=project.project_root,
|
|
model_paths=project.model_paths,
|
|
macro_paths=project.macro_paths,
|
|
seed_paths=project.seed_paths,
|
|
test_paths=project.test_paths,
|
|
analysis_paths=project.analysis_paths,
|
|
docs_paths=project.docs_paths,
|
|
asset_paths=project.asset_paths,
|
|
target_path=project.target_path,
|
|
snapshot_paths=project.snapshot_paths,
|
|
clean_targets=project.clean_targets,
|
|
log_path=project.log_path,
|
|
packages_install_path=project.packages_install_path,
|
|
packages_specified_path=project.packages_specified_path,
|
|
quoting=quoting,
|
|
models=project.models,
|
|
on_run_start=project.on_run_start,
|
|
on_run_end=project.on_run_end,
|
|
dispatch=project.dispatch,
|
|
seeds=project.seeds,
|
|
snapshots=project.snapshots,
|
|
dbt_version=project.dbt_version,
|
|
packages=project.packages,
|
|
manifest_selectors=project.manifest_selectors,
|
|
selectors=project.selectors,
|
|
query_comment=project.query_comment,
|
|
sources=project.sources,
|
|
data_tests=project.data_tests,
|
|
unit_tests=project.unit_tests,
|
|
metrics=project.metrics,
|
|
semantic_models=project.semantic_models,
|
|
saved_queries=project.saved_queries,
|
|
exposures=project.exposures,
|
|
vars=project.vars,
|
|
config_version=project.config_version,
|
|
unrendered=project.unrendered,
|
|
project_env_vars=project.project_env_vars,
|
|
restrict_access=project.restrict_access,
|
|
profile_env_vars=profile.profile_env_vars,
|
|
profile_name=profile.profile_name,
|
|
target_name=profile.target_name,
|
|
threads=profile.threads,
|
|
credentials=profile.credentials,
|
|
args=args,
|
|
cli_vars=cli_vars,
|
|
log_cache_events=log_cache_events,
|
|
dependencies=dependencies,
|
|
dbt_cloud=project.dbt_cloud,
|
|
)
|
|
|
|
# Called by 'load_projects' in this class
|
|
def new_project(self, project_root: str) -> "RuntimeConfig":
|
|
"""Given a new project root, read in its project dictionary, supply the
|
|
existing project's profile info, and create a new project file.
|
|
|
|
:param project_root: A filepath to a dbt project.
|
|
:raises DbtProfileError: If the profile is invalid.
|
|
:raises DbtProjectError: If project is missing or invalid.
|
|
:returns: The new configuration.
|
|
"""
|
|
# copy profile
|
|
profile = Profile(**self.to_profile_info())
|
|
profile.validate()
|
|
|
|
# load the new project and its packages. Don't pass cli variables.
|
|
renderer = DbtProjectYamlRenderer(profile)
|
|
project = Project.from_project_root(
|
|
project_root,
|
|
renderer,
|
|
verify_version=bool(getattr(self.args, "VERSION_CHECK", True)),
|
|
)
|
|
|
|
runtime_config = self.from_parts(
|
|
project=project,
|
|
profile=profile,
|
|
args=deepcopy(self.args),
|
|
)
|
|
# force our quoting back onto the new project.
|
|
runtime_config.quoting = deepcopy(self.quoting)
|
|
return runtime_config
|
|
|
|
def serialize(self) -> Dict[str, Any]:
|
|
"""Serialize the full configuration to a single dictionary. For any
|
|
instance that has passed validate() (which happens in __init__), it
|
|
matches the Configuration contract.
|
|
|
|
Note that args are not serialized.
|
|
|
|
:returns dict: The serialized configuration.
|
|
"""
|
|
result = self.to_project_config(with_packages=True)
|
|
result.update(self.to_profile_info(serialize_credentials=True))
|
|
result["cli_vars"] = deepcopy(self.cli_vars)
|
|
return result
|
|
|
|
def validate(self):
|
|
"""Validate the configuration against its contract.
|
|
|
|
:raises DbtProjectError: If the configuration fails validation.
|
|
"""
|
|
try:
|
|
Configuration.validate(self.serialize())
|
|
except ValidationError as e:
|
|
raise ConfigContractBrokenError(e) from e
|
|
|
|
# Called by RuntimeConfig.from_args
|
|
@classmethod
|
|
def collect_parts(cls: Type["RuntimeConfig"], args: Any) -> Tuple[Project, Profile]:
|
|
# profile_name from the project
|
|
project_root = args.project_dir if args.project_dir else os.getcwd()
|
|
cli_vars: Dict[str, Any] = getattr(args, "vars", {})
|
|
profile = cls.get_profile(
|
|
project_root,
|
|
cli_vars,
|
|
args,
|
|
)
|
|
flags = get_flags()
|
|
project = load_project(project_root, bool(flags.VERSION_CHECK), profile, cli_vars)
|
|
return project, profile
|
|
|
|
# Called in task/base.py, in BaseTask.from_args
|
|
@classmethod
|
|
def from_args(cls, args: Any) -> "RuntimeConfig":
|
|
"""Given arguments, read in dbt_project.yml from the current directory,
|
|
read in packages.yml if it exists, and use them to find the profile to
|
|
load.
|
|
|
|
:param args: The arguments as parsed from the cli.
|
|
:raises DbtProjectError: If the project is invalid or missing.
|
|
:raises DbtProfileError: If the profile is invalid or missing.
|
|
:raises DbtValidationError: If the cli variables are invalid.
|
|
"""
|
|
project, profile = cls.collect_parts(args)
|
|
|
|
return cls.from_parts(
|
|
project=project,
|
|
profile=profile,
|
|
args=args,
|
|
)
|
|
|
|
def get_metadata(self) -> ManifestMetadata:
|
|
return ManifestMetadata(
|
|
project_name=self.project_name,
|
|
project_id=self.hashed_name(),
|
|
user_id=tracking.active_user.id if tracking.active_user else None,
|
|
send_anonymous_usage_stats=(
|
|
get_flags().SEND_ANONYMOUS_USAGE_STATS if tracking.active_user else None
|
|
),
|
|
adapter_type=self.credentials.type,
|
|
)
|
|
|
|
def _get_v2_config_paths(
|
|
self,
|
|
config,
|
|
path: FQNPath,
|
|
paths: MutableSet[FQNPath],
|
|
) -> PathSet:
|
|
for key, value in config.items():
|
|
if isinstance(value, dict) and not key.startswith("+"):
|
|
self._get_config_paths(value, path + (key,), paths)
|
|
else:
|
|
paths.add(path)
|
|
return frozenset(paths)
|
|
|
|
def _get_config_paths(
|
|
self,
|
|
config: Dict[str, Any],
|
|
path: FQNPath = (),
|
|
paths: Optional[MutableSet[FQNPath]] = None,
|
|
) -> PathSet:
|
|
if paths is None:
|
|
paths = set()
|
|
|
|
for key, value in config.items():
|
|
if isinstance(value, dict) and not key.startswith("+"):
|
|
self._get_v2_config_paths(value, path + (key,), paths)
|
|
else:
|
|
paths.add(path)
|
|
return frozenset(paths)
|
|
|
|
def get_resource_config_paths(self) -> Dict[str, PathSet]:
|
|
"""Return a dictionary with resource type keys whose values are
|
|
lists of lists of strings, where each inner list of strings represents
|
|
a configured path in the resource.
|
|
"""
|
|
return {
|
|
"models": self._get_config_paths(self.models),
|
|
"seeds": self._get_config_paths(self.seeds),
|
|
"snapshots": self._get_config_paths(self.snapshots),
|
|
"sources": self._get_config_paths(self.sources),
|
|
"data_tests": self._get_config_paths(self.data_tests),
|
|
"unit_tests": self._get_config_paths(self.unit_tests),
|
|
"metrics": self._get_config_paths(self.metrics),
|
|
"semantic_models": self._get_config_paths(self.semantic_models),
|
|
"saved_queries": self._get_config_paths(self.saved_queries),
|
|
"exposures": self._get_config_paths(self.exposures),
|
|
}
|
|
|
|
def warn_for_unused_resource_config_paths(
|
|
self,
|
|
resource_fqns: Mapping[str, PathSet],
|
|
disabled: PathSet,
|
|
) -> None:
|
|
"""Return a list of lists of strings, where each inner list of strings
|
|
represents a type + FQN path of a resource configuration that is not
|
|
used.
|
|
"""
|
|
disabled_fqns = frozenset(tuple(fqn) for fqn in disabled)
|
|
resource_config_paths = self.get_resource_config_paths()
|
|
unused_resource_config_paths = []
|
|
for resource_type, config_paths in resource_config_paths.items():
|
|
used_fqns = resource_fqns.get(resource_type, frozenset())
|
|
fqns = used_fqns | disabled_fqns
|
|
|
|
for config_path in config_paths:
|
|
if not _is_config_used(config_path, fqns):
|
|
resource_path = ".".join(i for i in ((resource_type,) + config_path))
|
|
unused_resource_config_paths.append(resource_path)
|
|
|
|
if len(unused_resource_config_paths) == 0:
|
|
return
|
|
|
|
warn_or_error(UnusedResourceConfigPath(unused_config_paths=unused_resource_config_paths))
|
|
|
|
def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]:
|
|
if self.dependencies is None:
|
|
all_projects = {self.project_name: self}
|
|
internal_packages = get_include_paths(self.credentials.type)
|
|
if base_only:
|
|
# Test setup -- we want to load macros without dependencies
|
|
project_paths = itertools.chain(internal_packages)
|
|
else:
|
|
# raise exception if fewer installed packages than in packages.yml
|
|
count_packages_specified = len(self.packages.packages) # type: ignore
|
|
count_packages_installed = len(tuple(self._get_project_directories()))
|
|
if count_packages_specified > count_packages_installed:
|
|
raise UninstalledPackagesFoundError(
|
|
count_packages_specified,
|
|
count_packages_installed,
|
|
self.packages_specified_path,
|
|
self.packages_install_path,
|
|
)
|
|
project_paths = itertools.chain(internal_packages, self._get_project_directories())
|
|
for project_name, project in self.load_projects(project_paths):
|
|
if project_name in all_projects:
|
|
raise NonUniquePackageNameError(project_name)
|
|
all_projects[project_name] = project
|
|
self.dependencies = all_projects
|
|
return self.dependencies
|
|
|
|
def clear_dependencies(self):
|
|
self.dependencies = None
|
|
|
|
# Called by 'load_dependencies' in this class
|
|
def load_projects(self, paths: Iterable[Path]) -> Iterator[Tuple[str, "RuntimeConfig"]]:
|
|
for path in paths:
|
|
try:
|
|
project = self.new_project(str(path))
|
|
except DbtProjectError as e:
|
|
raise DbtProjectError(
|
|
f"Failed to read package: {e}",
|
|
result_type="invalid_project",
|
|
path=path,
|
|
) from e
|
|
else:
|
|
yield project.project_name, project
|
|
|
|
def _get_project_directories(self) -> Iterator[Path]:
|
|
root = Path(self.project_root) / self.packages_install_path
|
|
|
|
if root.exists():
|
|
for path in root.iterdir():
|
|
if path.is_dir() and not path.name.startswith("__"):
|
|
yield path
|
|
|
|
|
|
class UnsetCredentials(Credentials):
|
|
def __init__(self) -> None:
|
|
super().__init__("", "")
|
|
|
|
@property
|
|
def type(self):
|
|
return None
|
|
|
|
@property
|
|
def unique_field(self):
|
|
return None
|
|
|
|
def connection_info(self, *args, **kwargs):
|
|
return {}
|
|
|
|
def _connection_keys(self):
|
|
return ()
|
|
|
|
|
|
# This is used by commands which do not require
|
|
# a profile, i.e. dbt deps and clean
|
|
class UnsetProfile(Profile):
|
|
def __init__(self):
|
|
self.credentials = UnsetCredentials()
|
|
self.profile_name = ""
|
|
self.target_name = ""
|
|
self.threads = -1
|
|
|
|
def to_target_dict(self):
|
|
return DictDefaultEmptyStr({})
|
|
|
|
def __getattribute__(self, name):
|
|
if name in {"profile_name", "target_name", "threads"}:
|
|
raise DbtRuntimeError(f'Error: disallowed attribute "{name}" - no profile!')
|
|
|
|
return Profile.__getattribute__(self, name)
|
|
|
|
|
|
UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE = """\
|
|
Configuration paths exist in your dbt_project.yml file which do not \
|
|
apply to any resources.
|
|
There are {} unused configuration paths:
|
|
{}
|
|
"""
|
|
|
|
|
|
def _is_config_used(path, fqns):
|
|
if fqns:
|
|
for fqn in fqns:
|
|
if len(path) <= len(fqn) and fqn[: len(path)] == path:
|
|
return True
|
|
return False
|