Compare commits

...

4 Commits

Author SHA1 Message Date
Drew Banin
2ff478ed27 update 2020-12-07 10:30:50 -05:00
Drew Banin
b42429f966 make it work 2020-12-06 14:53:17 -05:00
Drew Banin
401376f540 Merge branch 'dev/kiyoshi-kuromiya' into feat/perf 2020-12-06 13:46:53 -05:00
Drew Banin
b4ab009112 not working 2020-10-03 11:13:09 -04:00
4 changed files with 109 additions and 22 deletions

View File

@@ -29,11 +29,25 @@ from dbt.contracts.graph.compiled import CompiledSchemaTestNode
from dbt.contracts.graph.parsed import ParsedSchemaTestNode
from dbt.exceptions import (
InternalException, raise_compiler_error, CompilationException,
invalid_materialization_argument, MacroReturn, JinjaRenderingException
invalid_materialization_argument, MacroReturn, JinjaRenderingException,
StaticAnalysisNotPossibleException
)
from dbt import flags
from dbt.logger import GLOBAL_LOGGER as logger # noqa
class Timer(object):
old = 0
new = 0
old_time = 0
new_time = 0
def debug(self):
if self.old % 25 == 0:
print()
print(f"OLD: {self.old} @ {self.old_time}s")
print(f"NEW: {self.new} @ {self.new_time}s")
timer = Timer()
def _linecache_inject(source, write):
if write:
@@ -459,7 +473,6 @@ TEXT_FILTERS: Dict[str, Callable[[Any], Any]] = {
'as_number': lambda x: x,
}
def get_environment(
node=None,
capture_macros: bool = False,
@@ -528,18 +541,6 @@ def render_template(template, ctx: Dict[str, Any], node=None) -> str:
return template.render(ctx)
def _requote_result(raw_value: str, rendered: str) -> str:
double_quoted = raw_value.startswith('"') and raw_value.endswith('"')
single_quoted = raw_value.startswith("'") and raw_value.endswith("'")
if double_quoted:
quote_char = '"'
elif single_quoted:
quote_char = "'"
else:
quote_char = ''
return f'{quote_char}{rendered}{quote_char}'
# performance note: Local benmcharking (so take it with a big grain of salt!)
# on this indicates that it is is on average slightly slower than
# checking two separate patterns, but the standard deviation is smaller with
@@ -562,7 +563,6 @@ def get_rendered(
# native=True case by passing the input string to ast.literal_eval, like
# the native renderer does.
if (
not native and
isinstance(string, str) and
_HAS_RENDER_CHARS_PAT.search(string) is None
):
@@ -577,6 +577,84 @@ def get_rendered(
return render_template(template, ctx, node)
def statically_extract_function_calls(string, ctx, node):
env = get_environment(node, capture_macros=True)
parsed = env.parse(string)
captured_calls = {
'source': [],
'ref': [],
'config': [],
}
for func_call in parsed.find_all(jinja2.nodes.Call):
func_name = func_call.node.name
# An unknown function was called - we cannot statically analyze
if func_name not in captured_calls:
raise StaticAnalysisNotPossibleException()
else:
try:
call_args = [
arg.as_const()
for arg in func_call.args
]
call_kwargs = {
arg.key: arg.value.as_const()
for arg in func_call.kwargs
}
except jinja2.nodes.Impossible as e:
raise StaticAnalysisNotPossibleException()
try:
captured_calls[func_name].append((call_args, call_kwargs))
except Exception as e:
pass
# If we got here without raising, then we can just call the methods
for func_name, arglist in captured_calls.items():
func = ctx.get(func_name)
# TODO : We could raise a smarter exception here (calling a method
# that is not in the context is bad anyway), but for now,
# let's just let the error bubble up through compilation
if func is None:
raise StaticAnalysisNotPossibleException()
for args, kwargs in arglist:
func(*args, **kwargs)
def get_dag_edges_and_configs(
string: str,
ctx: Dict[str, Any],
node=None,
):
# Try to statically analyze an AST to extract sources, refs
# configs, etc. If that's not possible, then just render the
# template for accuracy.
import time
try:
start = time.time()
statically_extract_function_calls(string, ctx, node)
timer.new_time += time.time() - start
timer.new += 1
# Hack to make sure that we run it both ways
raise StaticAnalysisNotPossibleException()
except StaticAnalysisNotPossibleException:
start = time.time()
get_rendered(string, ctx, node, capture_macros=True)
timer.old_time += time.time() - start
timer.old += 1
timer.debug()
def undefined_error(msg) -> NoReturn:
raise jinja2.exceptions.UndefinedError(msg)

View File

@@ -417,7 +417,8 @@ class ParseRefResolver(BaseRefResolver):
) -> RelationProxy:
self.model.refs.append(self._repack_args(name, package))
return self.Relation.create_from(self.config, self.model)
# Big takeaway: this is _very_ slow
#return self.Relation.create_from(self.config, self.model)
ResolveRef = Union[Disabled, ManifestNode]
@@ -495,6 +496,8 @@ class ParseSourceResolver(BaseSourceResolver):
def resolve(self, source_name: str, table_name: str):
# When you call source(), this is what happens at parse time
self.model.sources.append([source_name, table_name])
# Big takeaway: this is very, very slow
return self.Relation.create_from(self.config, self.model)

View File

@@ -41,6 +41,9 @@ class MacroReturn(builtins.BaseException):
self.value = value
class StaticAnalysisNotPossibleException(builtins.BaseException):
pass
class InternalException(Exception):
pass

View File

@@ -15,7 +15,7 @@ from dbt.context.providers import (
generate_generate_component_name_macro,
)
from dbt.adapters.factory import get_adapter
from dbt.clients.jinja import get_rendered
from dbt.clients.jinja import get_rendered, get_dag_edges_and_configs
from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import (
ContextConfig
@@ -282,12 +282,15 @@ class ConfiguredParser(
"""
# during parsing, we don't have a connection, but we might need one, so
# we have to acquire it.
with get_adapter(self.root_project).connection_for(parsed_node):
context = self._context_for(parsed_node, config)
# TODO: What happens if we don't have an adapter during parsing??
# - What does calling `connection_for` actually do to the context here?
#with get_adapter(self.root_project).connection_for(parsed_node):
context = self._context_for(parsed_node, config)
get_rendered(
parsed_node.raw_sql, context, parsed_node, capture_macros=True
)
#get_rendered(
# parsed_node.raw_sql, context, parsed_node, capture_macros=True
#)
get_dag_edges_and_configs(parsed_node.raw_sql, context, parsed_node)
def update_parsed_node_config(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]