update

make it work
Merge branch 'dev/kiyoshi-kuromiya' into feat/perf
2025-12-19 06:31:27 +00:00 · 2020-12-07 10:30:50 -05:00 · 2020-12-06 14:53:17 -05:00 · 2020-12-06 13:46:53 -05:00 · 2020-10-03 11:13:09 -04:00
4 changed files with 109 additions and 22 deletions
--- a/core/dbt/clients/jinja.py
+++ b/core/dbt/clients/jinja.py
@@ -29,11 +29,25 @@ from dbt.contracts.graph.compiled import CompiledSchemaTestNode
 from dbt.contracts.graph.parsed import ParsedSchemaTestNode
 from dbt.exceptions import (
    InternalException, raise_compiler_error, CompilationException,
-    invalid_materialization_argument, MacroReturn, JinjaRenderingException
+    invalid_materialization_argument, MacroReturn, JinjaRenderingException,
+    StaticAnalysisNotPossibleException
 )
 from dbt import flags
 from dbt.logger import GLOBAL_LOGGER as logger  # noqa

+class Timer(object):
+    old = 0
+    new = 0
+    old_time = 0
+    new_time = 0
+
+    def debug(self):
+        if self.old % 25 == 0:
+            print()
+            print(f"OLD: {self.old} @ {self.old_time}s")
+            print(f"NEW: {self.new} @ {self.new_time}s")
+
+timer = Timer()

 def _linecache_inject(source, write):
    if write:
@@ -459,7 +473,6 @@ TEXT_FILTERS: Dict[str, Callable[[Any], Any]] = {
    'as_number': lambda x: x,
 }

-
 def get_environment(
    node=None,
    capture_macros: bool = False,
@@ -528,18 +541,6 @@ def render_template(template, ctx: Dict[str, Any], node=None) -> str:
        return template.render(ctx)


-def _requote_result(raw_value: str, rendered: str) -> str:
-    double_quoted = raw_value.startswith('"') and raw_value.endswith('"')
-    single_quoted = raw_value.startswith("'") and raw_value.endswith("'")
-    if double_quoted:
-        quote_char = '"'
-    elif single_quoted:
-        quote_char = "'"
-    else:
-        quote_char = ''
-    return f'{quote_char}{rendered}{quote_char}'
-
-
 # performance note: Local benmcharking (so take it with a big grain of salt!)
 # on this indicates that it is is on average slightly slower than
 # checking two separate patterns, but the standard deviation is smaller with
@@ -562,7 +563,6 @@ def get_rendered(
    # native=True case by passing the input string to ast.literal_eval, like
    # the native renderer does.
    if (
-        not native and
        isinstance(string, str) and
        _HAS_RENDER_CHARS_PAT.search(string) is None
    ):
@@ -577,6 +577,84 @@ def get_rendered(
    return render_template(template, ctx, node)


+def statically_extract_function_calls(string, ctx, node):
+    env = get_environment(node, capture_macros=True)
+    parsed = env.parse(string)
+
+    captured_calls = {
+        'source': [],
+        'ref': [],
+        'config': [],
+    }
+
+    for func_call in parsed.find_all(jinja2.nodes.Call):
+        func_name = func_call.node.name
+
+        # An unknown function was called - we cannot statically analyze
+        if func_name not in captured_calls:
+            raise StaticAnalysisNotPossibleException()
+
+        else:
+            try:
+                call_args = [
+                    arg.as_const()
+                    for arg in func_call.args
+                ]
+
+                call_kwargs = {
+                    arg.key: arg.value.as_const()
+                    for arg in func_call.kwargs
+                }
+            except jinja2.nodes.Impossible as e:
+                raise StaticAnalysisNotPossibleException()
+
+            try:
+                captured_calls[func_name].append((call_args, call_kwargs))
+            except Exception as e:
+                pass
+
+    # If we got here without raising, then we can just call the methods
+    for func_name, arglist in captured_calls.items():
+
+        func = ctx.get(func_name)
+        # TODO : We could raise a smarter exception here (calling a method
+        #        that is not in the context is bad anyway), but for now,
+        #        let's just let the error bubble up through compilation
+        if func is None:
+            raise StaticAnalysisNotPossibleException()
+
+        for args, kwargs in arglist:
+            func(*args, **kwargs)
+
+
+def get_dag_edges_and_configs(
+    string: str,
+    ctx: Dict[str, Any],
+    node=None,
+):
+    # Try to statically analyze an AST to extract sources, refs
+    # configs, etc. If that's not possible, then just render the
+    # template for accuracy.
+    import time
+
+    try:
+        start = time.time()
+        statically_extract_function_calls(string, ctx, node)
+        timer.new_time += time.time() - start
+        timer.new += 1
+
+        # Hack to make sure that we run it both ways
+        raise StaticAnalysisNotPossibleException()
+
+    except StaticAnalysisNotPossibleException:
+        start = time.time()
+        get_rendered(string, ctx, node, capture_macros=True)
+        timer.old_time += time.time() - start
+        timer.old += 1
+
+    timer.debug()
+
+
 def undefined_error(msg) -> NoReturn:
    raise jinja2.exceptions.UndefinedError(msg)

--- a/core/dbt/context/providers.py
+++ b/core/dbt/context/providers.py
@@ -417,7 +417,8 @@ class ParseRefResolver(BaseRefResolver):
    ) -> RelationProxy:
        self.model.refs.append(self._repack_args(name, package))

-        return self.Relation.create_from(self.config, self.model)
+        # Big takeaway: this is _very_ slow
+        #return self.Relation.create_from(self.config, self.model)


 ResolveRef = Union[Disabled, ManifestNode]
@@ -495,6 +496,8 @@ class ParseSourceResolver(BaseSourceResolver):
    def resolve(self, source_name: str, table_name: str):
        # When you call source(), this is what happens at parse time
        self.model.sources.append([source_name, table_name])
+
+        # Big takeaway: this is very, very slow
        return self.Relation.create_from(self.config, self.model)


--- a/core/dbt/exceptions.py
+++ b/core/dbt/exceptions.py
@@ -41,6 +41,9 @@ class MacroReturn(builtins.BaseException):
        self.value = value


+class StaticAnalysisNotPossibleException(builtins.BaseException):
+    pass
+
 class InternalException(Exception):
    pass

--- a/core/dbt/parser/base.py
+++ b/core/dbt/parser/base.py
@@ -15,7 +15,7 @@ from dbt.context.providers import (
    generate_generate_component_name_macro,
 )
 from dbt.adapters.factory import get_adapter
-from dbt.clients.jinja import get_rendered
+from dbt.clients.jinja import get_rendered, get_dag_edges_and_configs
 from dbt.config import Project, RuntimeConfig
 from dbt.context.context_config import (
    ContextConfig
@@ -282,12 +282,15 @@ class ConfiguredParser(
        """
        # during parsing, we don't have a connection, but we might need one, so
        # we have to acquire it.
-        with get_adapter(self.root_project).connection_for(parsed_node):
-            context = self._context_for(parsed_node, config)
+        # TODO: What happens if we don't have an adapter during parsing??
+        #   - What does calling `connection_for` actually do to the context here?
+        #with get_adapter(self.root_project).connection_for(parsed_node):
+        context = self._context_for(parsed_node, config)

-            get_rendered(
-                parsed_node.raw_sql, context, parsed_node, capture_macros=True
-            )
+        #get_rendered(
+        #    parsed_node.raw_sql, context, parsed_node, capture_macros=True
+        #)
+        get_dag_edges_and_configs(parsed_node.raw_sql, context, parsed_node)

    def update_parsed_node_config(
        self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
Author	SHA1	Message	Date
Drew Banin	2ff478ed27	update	2020-12-07 10:30:50 -05:00
Drew Banin	b42429f966	make it work	2020-12-06 14:53:17 -05:00
Drew Banin	401376f540	Merge branch 'dev/kiyoshi-kuromiya' into feat/perf	2020-12-06 13:46:53 -05:00
Drew Banin	b4ab009112	not working	2020-10-03 11:13:09 -04:00