Add partitioned by and cluster by segments to SparkSQL Create view statement (#7137)

This commit is contained in:
Enrique Perez Delgado
2025-09-20 22:44:00 +02:00
committed by GitHub
parent 655b8f2c3b
commit 9a5d677dbc
5 changed files with 190 additions and 3 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@
.idea
/.sqlfluff
**/.DS_Store
.junie
# Ignore Python cache and prebuilt things
.cache

View File

@@ -6,7 +6,7 @@ flake8
flake8-docstrings
pydocstyle!=6.2.0, !=6.2.1 # See: https://github.com/PyCQA/pydocstyle/issues/618
black>=22.1.0
flake8-black>=0.2.4
flake8-black>=0.3.7
ruff
import-linter
yamllint

View File

@@ -1662,7 +1662,7 @@ class CreateTableStatementSegment(ansi.CreateTableStatementSegment):
class CreateViewStatementSegment(ansi.CreateViewStatementSegment):
"""A `CREATE VIEW` statement.
https://spark.apache.org/docs/3.0.0/sql-ref-syntax-ddl-create-view.html#syntax
https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-view.html#syntax
"""
match_grammar = Sequence(
@@ -1691,6 +1691,11 @@ class CreateViewStatementSegment(ansi.CreateViewStatementSegment):
),
Sequence("USING", Ref("DataSourceFormatSegment"), optional=True),
Ref("OptionsGrammar", optional=True),
OneOf(
Ref("PartitionSpecGrammar"),
Ref("TableClusterByClauseSegment"),
optional=True,
),
Ref("CommentGrammar", optional=True),
Ref("TablePropertiesGrammar", optional=True),
Ref("CreateViewClausesGrammar", optional=True),

View File

@@ -36,3 +36,31 @@ AS SELECT
a,
b
FROM live.dlt_bronze;
CREATE OR REFRESH MATERIALIZED VIEW my_dlt_mat_view (
col1 STRING COMMENT 'Dummy column 1',
col2 BIGINT COMMENT 'Dummy column 2',
col3 BOOLEAN COMMENT 'Dummy column 3'
)
PARTITIONED BY (col1)
COMMENT 'Example simplified materialized view with dummy fields.'
TBLPROPERTIES ('quality' = 'gold')
AS SELECT
col1,
col2,
col3
FROM my_source_table;
CREATE OR REFRESH MATERIALIZED VIEW my_dlt_mat_view (
col1 STRING COMMENT 'Dummy column 1',
col2 BIGINT COMMENT 'Dummy column 2',
col3 BOOLEAN COMMENT 'Dummy column 3'
)
CLUSTER BY (col1)
COMMENT 'Example simplified materialized view with dummy fields.'
TBLPROPERTIES ('quality' = 'gold')
AS SELECT
col1,
col2,
col3
FROM my_source_table;

View File

@@ -3,7 +3,7 @@
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 662d898d3182641df898b2ce6e2764fddffcbeb3003ad42317b6830c756b48ca
_hash: 555cf4e02736d842de63f822a3855bd54d969e2fb17a88186d4b53e039aa3241
file:
- statement:
create_view_statement:
@@ -213,3 +213,156 @@ file:
- dot: .
- naked_identifier: dlt_bronze
- statement_terminator: ;
- statement:
create_view_statement:
- keyword: CREATE
- keyword: OR
- keyword: REFRESH
- keyword: MATERIALIZED
- keyword: VIEW
- table_reference:
naked_identifier: my_dlt_mat_view
- bracketed:
- start_bracket: (
- column_reference:
naked_identifier: col1
- data_type:
primitive_type:
keyword: STRING
- keyword: COMMENT
- quoted_literal: "'Dummy column 1'"
- comma: ','
- column_reference:
naked_identifier: col2
- data_type:
primitive_type:
keyword: BIGINT
- keyword: COMMENT
- quoted_literal: "'Dummy column 2'"
- comma: ','
- column_reference:
naked_identifier: col3
- data_type:
primitive_type:
keyword: BOOLEAN
- keyword: COMMENT
- quoted_literal: "'Dummy column 3'"
- end_bracket: )
- keyword: PARTITIONED
- keyword: BY
- bracketed:
start_bracket: (
column_reference:
naked_identifier: col1
end_bracket: )
- keyword: COMMENT
- quoted_literal: "'Example simplified materialized view with dummy fields.'"
- keyword: TBLPROPERTIES
- bracketed:
start_bracket: (
property_name_identifier:
quoted_identifier: "'quality'"
comparison_operator:
raw_comparison_operator: '='
quoted_literal: "'gold'"
end_bracket: )
- keyword: AS
- select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
naked_identifier: col1
- comma: ','
- select_clause_element:
column_reference:
naked_identifier: col2
- comma: ','
- select_clause_element:
column_reference:
naked_identifier: col3
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
naked_identifier: my_source_table
- statement_terminator: ;
- statement:
create_view_statement:
- keyword: CREATE
- keyword: OR
- keyword: REFRESH
- keyword: MATERIALIZED
- keyword: VIEW
- table_reference:
naked_identifier: my_dlt_mat_view
- bracketed:
- start_bracket: (
- column_reference:
naked_identifier: col1
- data_type:
primitive_type:
keyword: STRING
- keyword: COMMENT
- quoted_literal: "'Dummy column 1'"
- comma: ','
- column_reference:
naked_identifier: col2
- data_type:
primitive_type:
keyword: BIGINT
- keyword: COMMENT
- quoted_literal: "'Dummy column 2'"
- comma: ','
- column_reference:
naked_identifier: col3
- data_type:
primitive_type:
keyword: BOOLEAN
- keyword: COMMENT
- quoted_literal: "'Dummy column 3'"
- end_bracket: )
- table_cluster_by_clause:
- keyword: CLUSTER
- keyword: BY
- bracketed:
start_bracket: (
column_reference:
naked_identifier: col1
end_bracket: )
- keyword: COMMENT
- quoted_literal: "'Example simplified materialized view with dummy fields.'"
- keyword: TBLPROPERTIES
- bracketed:
start_bracket: (
property_name_identifier:
quoted_identifier: "'quality'"
comparison_operator:
raw_comparison_operator: '='
quoted_literal: "'gold'"
end_bracket: )
- keyword: AS
- select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
naked_identifier: col1
- comma: ','
- select_clause_element:
column_reference:
naked_identifier: col2
- comma: ','
- select_clause_element:
column_reference:
naked_identifier: col3
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
naked_identifier: my_source_table
- statement_terminator: ;