fixes #6463: Set Variable Parsing for SparkSQL and Databricks (#6464)

Co-authored-by: F STG <fritz.steingrube@extern.stromnetz-hamburg.de>
This commit is contained in:
F
2024-11-25 23:11:12 +01:00
committed by GitHub
parent 8e26bd4ff7
commit 7783c3014e
7 changed files with 434 additions and 0 deletions

View File

@@ -238,6 +238,10 @@ databricks_dialect.add(
NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"),
MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"),
MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"),
VariableNameIdentifierSegment=OneOf(
Ref("NakedIdentifierSegment"),
Ref("BackQuotedIdentifierSegment"),
),
)
databricks_dialect.replace(
@@ -1578,3 +1582,47 @@ class MagicCellStatementSegment(BaseSegment):
terminators=[Ref("CommandCellSegment", optional=True)],
reset_terminators=True,
)
class SetVariableStatementSegment(BaseSegment):
"""A `SET VARIABLE` statement used to set session variables.
https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-set-variable.html
"""
type = "set_variable_statement"
# set var v1=val, v2=val2;
set_kv_pair = Sequence(
Delimited(
Ref("VariableNameIdentifierSegment"),
Ref("EqualsSegment"),
OneOf("DEFAULT", OptionallyBracketed(Ref("ExpressionSegment"))),
)
)
# set var (v1,v2) = (values(100,200))
set_bracketed = Sequence(
Bracketed(
Ref("VariableNameIdentifierSegment"),
),
Ref("EqualsSegment"),
Bracketed(
OneOf(
Ref("SelectStatementSegment"),
Ref("ValuesClauseSegment"),
)
),
)
match_grammar = Sequence(
"SET",
OneOf(
"VAR",
"VARIABLE",
),
OneOf(
set_kv_pair,
set_bracketed,
),
allow_gaps=True,
)

View File

@@ -2804,6 +2804,7 @@ class StatementSegment(ansi.StatementSegment):
Ref("CreateWidgetStatementSegment"),
Ref("RemoveWidgetStatementSegment"),
Ref("ReplaceTableStatementSegment"),
Ref("SetVariableStatementSegment"),
],
remove=[
Ref("TransactionStatementSegment"),
@@ -3558,3 +3559,27 @@ class FrameClauseSegment(ansi.FrameClauseSegment):
Ref("FrameClauseUnitGrammar"),
OneOf(_frame_extent, Sequence("BETWEEN", _frame_extent, "AND", _frame_extent)),
)
class SetVariableStatementSegment(BaseSegment):
"""A `SET VARIABLE` statement used to set session variables.
https://spark.apache.org/docs/4.0.0-preview2/sql-ref-syntax-aux-set-var.html
"""
type = "set_variable_statement"
match_grammar = Sequence(
"SET",
OneOf(
"VAR",
"VARIABLE",
),
OptionallyBracketed(Delimited(Ref("SingleIdentifierGrammar"))),
Ref("EqualsSegment"),
OneOf(
"DEFAULT",
OptionallyBracketed(Ref("ExpressionSegment")),
),
allow_gaps=True,
)

View File

@@ -278,6 +278,8 @@ UNRESERVED_KEYWORDS = [
"UPDATE",
"USE",
"VALUES",
"VAR",
"VARIABLE",
"VARIANT",
"VIEW",
"VIEWS",

View File

@@ -0,0 +1,20 @@
-- simple assignment
SET VAR var1 = 5;
-- A complex expression assignment
SET VARIABLE var1 = (SELECT max(c1) FROM VALUES(1), (2) AS t(c1));
-- resetting the variable to DEFAULT (set in declare)
SET VAR var1 = DEFAULT;
-- A multi variable assignment
SET VAR (var1, var2, var3) = (VALUES(100,'x123',DEFAULT));
-- escpaed function name
SET VARIABLE `foo` = select 'bar';
-- function call
set var tz = current_timezone();
-- set multiple vars in one statement
set var x1 = 12, x2 = 'helloworld';

View File

@@ -0,0 +1,169 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: febbfaa5d5e7156ca3b9bf06e923cfde3f62aafb1e4a654985062f5eb155b753
file:
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- expression:
column_reference:
naked_identifier: var1
comparison_operator:
raw_comparison_operator: '='
numeric_literal: '5'
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VARIABLE
- expression:
column_reference:
naked_identifier: var1
comparison_operator:
raw_comparison_operator: '='
bracketed:
start_bracket: (
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
function:
function_name:
function_name_identifier: max
function_contents:
bracketed:
start_bracket: (
expression:
column_reference:
naked_identifier: c1
end_bracket: )
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
values_clause:
- keyword: VALUES
- bracketed:
start_bracket: (
expression:
numeric_literal: '1'
end_bracket: )
- comma: ','
- bracketed:
start_bracket: (
expression:
numeric_literal: '2'
end_bracket: )
- alias_expression:
keyword: AS
naked_identifier: t
bracketed:
start_bracket: (
identifier_list:
naked_identifier: c1
end_bracket: )
end_bracket: )
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- expression:
- column_reference:
naked_identifier: var1
- comparison_operator:
raw_comparison_operator: '='
- column_reference:
naked_identifier: DEFAULT
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- expression:
- bracketed:
- start_bracket: (
- column_reference:
naked_identifier: var1
- comma: ','
- column_reference:
naked_identifier: var2
- comma: ','
- column_reference:
naked_identifier: var3
- end_bracket: )
- comparison_operator:
raw_comparison_operator: '='
- bracketed:
start_bracket: (
values_clause:
keyword: VALUES
bracketed:
- start_bracket: (
- expression:
numeric_literal: '100'
- comma: ','
- expression:
quoted_literal: "'x123'"
- comma: ','
- expression:
column_reference:
naked_identifier: DEFAULT
- end_bracket: )
end_bracket: )
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VARIABLE
- expression:
column_reference:
quoted_identifier: '`foo`'
comparison_operator:
raw_comparison_operator: '='
select_statement:
select_clause:
keyword: select
select_clause_element:
quoted_literal: "'bar'"
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: set
- keyword: var
- expression:
column_reference:
naked_identifier: tz
comparison_operator:
raw_comparison_operator: '='
function:
function_name:
function_name_identifier: current_timezone
function_contents:
bracketed:
start_bracket: (
end_bracket: )
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: set
- keyword: var
- expression:
column_reference:
naked_identifier: x1
comparison_operator:
raw_comparison_operator: '='
numeric_literal: '12'
- comma: ','
- expression:
column_reference:
naked_identifier: x2
comparison_operator:
raw_comparison_operator: '='
quoted_literal: "'helloworld'"
- statement_terminator: ;

View File

@@ -0,0 +1,11 @@
-- simple assignment
SET VAR var1 = 5;
-- A complex expression assignment
SET VARIABLE var1 = (SELECT max(c1) FROM VALUES(1), (2) AS t(c1));
-- resetting the variable to DEFAULT (set in declare)
SET VAR var1 = DEFAULT;
-- A multi variable assignment
SET VAR (var1, var2) = (SELECT max(c1), CAST(min(c1) AS STRING) FROM VALUES(1), (2) AS t(c1));

View File

@@ -0,0 +1,159 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 32f6d0b6c86fb05fa63ca59145152c73a3f238fc4105613e546890be04b7f30c
file:
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- naked_identifier: var1
- comparison_operator:
raw_comparison_operator: '='
- expression:
numeric_literal: '5'
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VARIABLE
- naked_identifier: var1
- comparison_operator:
raw_comparison_operator: '='
- expression:
bracketed:
start_bracket: (
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
function:
function_name:
function_name_identifier: max
function_contents:
bracketed:
start_bracket: (
expression:
column_reference:
naked_identifier: c1
end_bracket: )
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
values_clause:
- keyword: VALUES
- bracketed:
start_bracket: (
expression:
numeric_literal: '1'
end_bracket: )
- comma: ','
- bracketed:
start_bracket: (
expression:
numeric_literal: '2'
end_bracket: )
- alias_expression:
keyword: AS
naked_identifier: t
bracketed:
start_bracket: (
identifier_list:
naked_identifier: c1
end_bracket: )
end_bracket: )
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- naked_identifier: var1
- comparison_operator:
raw_comparison_operator: '='
- keyword: DEFAULT
- statement_terminator: ;
- statement:
set_variable_statement:
- keyword: SET
- keyword: VAR
- bracketed:
- start_bracket: (
- naked_identifier: var1
- comma: ','
- naked_identifier: var2
- end_bracket: )
- comparison_operator:
raw_comparison_operator: '='
- expression:
bracketed:
start_bracket: (
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
function:
function_name:
function_name_identifier: max
function_contents:
bracketed:
start_bracket: (
expression:
column_reference:
naked_identifier: c1
end_bracket: )
- comma: ','
- select_clause_element:
function:
function_name:
function_name_identifier: CAST
function_contents:
bracketed:
start_bracket: (
expression:
function:
function_name:
function_name_identifier: min
function_contents:
bracketed:
start_bracket: (
expression:
column_reference:
naked_identifier: c1
end_bracket: )
keyword: AS
data_type:
primitive_type:
keyword: STRING
end_bracket: )
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
values_clause:
- keyword: VALUES
- bracketed:
start_bracket: (
expression:
numeric_literal: '1'
end_bracket: )
- comma: ','
- bracketed:
start_bracket: (
expression:
numeric_literal: '2'
end_bracket: )
- alias_expression:
keyword: AS
naked_identifier: t
bracketed:
start_bracket: (
identifier_list:
naked_identifier: c1
end_bracket: )
end_bracket: )
- statement_terminator: ;