mirror of
https://github.com/dbt-labs/dbt-core
synced 2025-12-23 00:21:27 +00:00
Compare commits
4 Commits
enable-pos
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ff478ed27 | ||
|
|
b42429f966 | ||
|
|
401376f540 | ||
|
|
b4ab009112 |
@@ -29,11 +29,25 @@ from dbt.contracts.graph.compiled import CompiledSchemaTestNode
|
|||||||
from dbt.contracts.graph.parsed import ParsedSchemaTestNode
|
from dbt.contracts.graph.parsed import ParsedSchemaTestNode
|
||||||
from dbt.exceptions import (
|
from dbt.exceptions import (
|
||||||
InternalException, raise_compiler_error, CompilationException,
|
InternalException, raise_compiler_error, CompilationException,
|
||||||
invalid_materialization_argument, MacroReturn, JinjaRenderingException
|
invalid_materialization_argument, MacroReturn, JinjaRenderingException,
|
||||||
|
StaticAnalysisNotPossibleException
|
||||||
)
|
)
|
||||||
from dbt import flags
|
from dbt import flags
|
||||||
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
from dbt.logger import GLOBAL_LOGGER as logger # noqa
|
||||||
|
|
||||||
|
class Timer(object):
|
||||||
|
old = 0
|
||||||
|
new = 0
|
||||||
|
old_time = 0
|
||||||
|
new_time = 0
|
||||||
|
|
||||||
|
def debug(self):
|
||||||
|
if self.old % 25 == 0:
|
||||||
|
print()
|
||||||
|
print(f"OLD: {self.old} @ {self.old_time}s")
|
||||||
|
print(f"NEW: {self.new} @ {self.new_time}s")
|
||||||
|
|
||||||
|
timer = Timer()
|
||||||
|
|
||||||
def _linecache_inject(source, write):
|
def _linecache_inject(source, write):
|
||||||
if write:
|
if write:
|
||||||
@@ -459,7 +473,6 @@ TEXT_FILTERS: Dict[str, Callable[[Any], Any]] = {
|
|||||||
'as_number': lambda x: x,
|
'as_number': lambda x: x,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_environment(
|
def get_environment(
|
||||||
node=None,
|
node=None,
|
||||||
capture_macros: bool = False,
|
capture_macros: bool = False,
|
||||||
@@ -528,18 +541,6 @@ def render_template(template, ctx: Dict[str, Any], node=None) -> str:
|
|||||||
return template.render(ctx)
|
return template.render(ctx)
|
||||||
|
|
||||||
|
|
||||||
def _requote_result(raw_value: str, rendered: str) -> str:
|
|
||||||
double_quoted = raw_value.startswith('"') and raw_value.endswith('"')
|
|
||||||
single_quoted = raw_value.startswith("'") and raw_value.endswith("'")
|
|
||||||
if double_quoted:
|
|
||||||
quote_char = '"'
|
|
||||||
elif single_quoted:
|
|
||||||
quote_char = "'"
|
|
||||||
else:
|
|
||||||
quote_char = ''
|
|
||||||
return f'{quote_char}{rendered}{quote_char}'
|
|
||||||
|
|
||||||
|
|
||||||
# performance note: Local benmcharking (so take it with a big grain of salt!)
|
# performance note: Local benmcharking (so take it with a big grain of salt!)
|
||||||
# on this indicates that it is is on average slightly slower than
|
# on this indicates that it is is on average slightly slower than
|
||||||
# checking two separate patterns, but the standard deviation is smaller with
|
# checking two separate patterns, but the standard deviation is smaller with
|
||||||
@@ -562,7 +563,6 @@ def get_rendered(
|
|||||||
# native=True case by passing the input string to ast.literal_eval, like
|
# native=True case by passing the input string to ast.literal_eval, like
|
||||||
# the native renderer does.
|
# the native renderer does.
|
||||||
if (
|
if (
|
||||||
not native and
|
|
||||||
isinstance(string, str) and
|
isinstance(string, str) and
|
||||||
_HAS_RENDER_CHARS_PAT.search(string) is None
|
_HAS_RENDER_CHARS_PAT.search(string) is None
|
||||||
):
|
):
|
||||||
@@ -577,6 +577,84 @@ def get_rendered(
|
|||||||
return render_template(template, ctx, node)
|
return render_template(template, ctx, node)
|
||||||
|
|
||||||
|
|
||||||
|
def statically_extract_function_calls(string, ctx, node):
|
||||||
|
env = get_environment(node, capture_macros=True)
|
||||||
|
parsed = env.parse(string)
|
||||||
|
|
||||||
|
captured_calls = {
|
||||||
|
'source': [],
|
||||||
|
'ref': [],
|
||||||
|
'config': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for func_call in parsed.find_all(jinja2.nodes.Call):
|
||||||
|
func_name = func_call.node.name
|
||||||
|
|
||||||
|
# An unknown function was called - we cannot statically analyze
|
||||||
|
if func_name not in captured_calls:
|
||||||
|
raise StaticAnalysisNotPossibleException()
|
||||||
|
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
call_args = [
|
||||||
|
arg.as_const()
|
||||||
|
for arg in func_call.args
|
||||||
|
]
|
||||||
|
|
||||||
|
call_kwargs = {
|
||||||
|
arg.key: arg.value.as_const()
|
||||||
|
for arg in func_call.kwargs
|
||||||
|
}
|
||||||
|
except jinja2.nodes.Impossible as e:
|
||||||
|
raise StaticAnalysisNotPossibleException()
|
||||||
|
|
||||||
|
try:
|
||||||
|
captured_calls[func_name].append((call_args, call_kwargs))
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If we got here without raising, then we can just call the methods
|
||||||
|
for func_name, arglist in captured_calls.items():
|
||||||
|
|
||||||
|
func = ctx.get(func_name)
|
||||||
|
# TODO : We could raise a smarter exception here (calling a method
|
||||||
|
# that is not in the context is bad anyway), but for now,
|
||||||
|
# let's just let the error bubble up through compilation
|
||||||
|
if func is None:
|
||||||
|
raise StaticAnalysisNotPossibleException()
|
||||||
|
|
||||||
|
for args, kwargs in arglist:
|
||||||
|
func(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_dag_edges_and_configs(
|
||||||
|
string: str,
|
||||||
|
ctx: Dict[str, Any],
|
||||||
|
node=None,
|
||||||
|
):
|
||||||
|
# Try to statically analyze an AST to extract sources, refs
|
||||||
|
# configs, etc. If that's not possible, then just render the
|
||||||
|
# template for accuracy.
|
||||||
|
import time
|
||||||
|
|
||||||
|
try:
|
||||||
|
start = time.time()
|
||||||
|
statically_extract_function_calls(string, ctx, node)
|
||||||
|
timer.new_time += time.time() - start
|
||||||
|
timer.new += 1
|
||||||
|
|
||||||
|
# Hack to make sure that we run it both ways
|
||||||
|
raise StaticAnalysisNotPossibleException()
|
||||||
|
|
||||||
|
except StaticAnalysisNotPossibleException:
|
||||||
|
start = time.time()
|
||||||
|
get_rendered(string, ctx, node, capture_macros=True)
|
||||||
|
timer.old_time += time.time() - start
|
||||||
|
timer.old += 1
|
||||||
|
|
||||||
|
timer.debug()
|
||||||
|
|
||||||
|
|
||||||
def undefined_error(msg) -> NoReturn:
|
def undefined_error(msg) -> NoReturn:
|
||||||
raise jinja2.exceptions.UndefinedError(msg)
|
raise jinja2.exceptions.UndefinedError(msg)
|
||||||
|
|
||||||
|
|||||||
@@ -417,7 +417,8 @@ class ParseRefResolver(BaseRefResolver):
|
|||||||
) -> RelationProxy:
|
) -> RelationProxy:
|
||||||
self.model.refs.append(self._repack_args(name, package))
|
self.model.refs.append(self._repack_args(name, package))
|
||||||
|
|
||||||
return self.Relation.create_from(self.config, self.model)
|
# Big takeaway: this is _very_ slow
|
||||||
|
#return self.Relation.create_from(self.config, self.model)
|
||||||
|
|
||||||
|
|
||||||
ResolveRef = Union[Disabled, ManifestNode]
|
ResolveRef = Union[Disabled, ManifestNode]
|
||||||
@@ -495,6 +496,8 @@ class ParseSourceResolver(BaseSourceResolver):
|
|||||||
def resolve(self, source_name: str, table_name: str):
|
def resolve(self, source_name: str, table_name: str):
|
||||||
# When you call source(), this is what happens at parse time
|
# When you call source(), this is what happens at parse time
|
||||||
self.model.sources.append([source_name, table_name])
|
self.model.sources.append([source_name, table_name])
|
||||||
|
|
||||||
|
# Big takeaway: this is very, very slow
|
||||||
return self.Relation.create_from(self.config, self.model)
|
return self.Relation.create_from(self.config, self.model)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ class MacroReturn(builtins.BaseException):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
|
|
||||||
|
class StaticAnalysisNotPossibleException(builtins.BaseException):
|
||||||
|
pass
|
||||||
|
|
||||||
class InternalException(Exception):
|
class InternalException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from dbt.context.providers import (
|
|||||||
generate_generate_component_name_macro,
|
generate_generate_component_name_macro,
|
||||||
)
|
)
|
||||||
from dbt.adapters.factory import get_adapter
|
from dbt.adapters.factory import get_adapter
|
||||||
from dbt.clients.jinja import get_rendered
|
from dbt.clients.jinja import get_rendered, get_dag_edges_and_configs
|
||||||
from dbt.config import Project, RuntimeConfig
|
from dbt.config import Project, RuntimeConfig
|
||||||
from dbt.context.context_config import (
|
from dbt.context.context_config import (
|
||||||
ContextConfig
|
ContextConfig
|
||||||
@@ -282,12 +282,15 @@ class ConfiguredParser(
|
|||||||
"""
|
"""
|
||||||
# during parsing, we don't have a connection, but we might need one, so
|
# during parsing, we don't have a connection, but we might need one, so
|
||||||
# we have to acquire it.
|
# we have to acquire it.
|
||||||
with get_adapter(self.root_project).connection_for(parsed_node):
|
# TODO: What happens if we don't have an adapter during parsing??
|
||||||
context = self._context_for(parsed_node, config)
|
# - What does calling `connection_for` actually do to the context here?
|
||||||
|
#with get_adapter(self.root_project).connection_for(parsed_node):
|
||||||
|
context = self._context_for(parsed_node, config)
|
||||||
|
|
||||||
get_rendered(
|
#get_rendered(
|
||||||
parsed_node.raw_sql, context, parsed_node, capture_macros=True
|
# parsed_node.raw_sql, context, parsed_node, capture_macros=True
|
||||||
)
|
#)
|
||||||
|
get_dag_edges_and_configs(parsed_node.raw_sql, context, parsed_node)
|
||||||
|
|
||||||
def update_parsed_node_config(
|
def update_parsed_node_config(
|
||||||
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
|
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
|
||||||
|
|||||||
Reference in New Issue
Block a user