From 9a5d677dbca9fd4c39344f0827c3ec9657093744 Mon Sep 17 00:00:00 2001 From: Enrique Perez Delgado Date: Sat, 20 Sep 2025 22:44:00 +0200 Subject: [PATCH] Add `partitioned by` and `cluster by` segments to SparkSQL Create view statement (#7137) --- .gitignore | 1 + requirements_dev.txt | 2 +- src/sqlfluff/dialects/dialect_sparksql.py | 7 +- .../sparksql/databricks_dlt_create_view.sql | 28 ++++ .../sparksql/databricks_dlt_create_view.yml | 155 +++++++++++++++++- 5 files changed, 190 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 6344b99a9..5ff03c56c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .idea /.sqlfluff **/.DS_Store +.junie # Ignore Python cache and prebuilt things .cache diff --git a/requirements_dev.txt b/requirements_dev.txt index 6852cba2e..a4422cbfa 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -6,7 +6,7 @@ flake8 flake8-docstrings pydocstyle!=6.2.0, !=6.2.1 # See: https://github.com/PyCQA/pydocstyle/issues/618 black>=22.1.0 -flake8-black>=0.2.4 +flake8-black>=0.3.7 ruff import-linter yamllint diff --git a/src/sqlfluff/dialects/dialect_sparksql.py b/src/sqlfluff/dialects/dialect_sparksql.py index 293e938fe..abe6f179a 100644 --- a/src/sqlfluff/dialects/dialect_sparksql.py +++ b/src/sqlfluff/dialects/dialect_sparksql.py @@ -1662,7 +1662,7 @@ class CreateTableStatementSegment(ansi.CreateTableStatementSegment): class CreateViewStatementSegment(ansi.CreateViewStatementSegment): """A `CREATE VIEW` statement. - https://spark.apache.org/docs/3.0.0/sql-ref-syntax-ddl-create-view.html#syntax + https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-view.html#syntax """ match_grammar = Sequence( @@ -1691,6 +1691,11 @@ class CreateViewStatementSegment(ansi.CreateViewStatementSegment): ), Sequence("USING", Ref("DataSourceFormatSegment"), optional=True), Ref("OptionsGrammar", optional=True), + OneOf( + Ref("PartitionSpecGrammar"), + Ref("TableClusterByClauseSegment"), + optional=True, + ), Ref("CommentGrammar", optional=True), Ref("TablePropertiesGrammar", optional=True), Ref("CreateViewClausesGrammar", optional=True), diff --git a/test/fixtures/dialects/sparksql/databricks_dlt_create_view.sql b/test/fixtures/dialects/sparksql/databricks_dlt_create_view.sql index fb2fa4f93..bacb32961 100644 --- a/test/fixtures/dialects/sparksql/databricks_dlt_create_view.sql +++ b/test/fixtures/dialects/sparksql/databricks_dlt_create_view.sql @@ -36,3 +36,31 @@ AS SELECT a, b FROM live.dlt_bronze; + +CREATE OR REFRESH MATERIALIZED VIEW my_dlt_mat_view ( + col1 STRING COMMENT 'Dummy column 1', + col2 BIGINT COMMENT 'Dummy column 2', + col3 BOOLEAN COMMENT 'Dummy column 3' +) +PARTITIONED BY (col1) +COMMENT 'Example simplified materialized view with dummy fields.' +TBLPROPERTIES ('quality' = 'gold') +AS SELECT + col1, + col2, + col3 +FROM my_source_table; + +CREATE OR REFRESH MATERIALIZED VIEW my_dlt_mat_view ( + col1 STRING COMMENT 'Dummy column 1', + col2 BIGINT COMMENT 'Dummy column 2', + col3 BOOLEAN COMMENT 'Dummy column 3' +) +CLUSTER BY (col1) +COMMENT 'Example simplified materialized view with dummy fields.' +TBLPROPERTIES ('quality' = 'gold') +AS SELECT + col1, + col2, + col3 +FROM my_source_table; diff --git a/test/fixtures/dialects/sparksql/databricks_dlt_create_view.yml b/test/fixtures/dialects/sparksql/databricks_dlt_create_view.yml index c657e8289..b73dc1446 100644 --- a/test/fixtures/dialects/sparksql/databricks_dlt_create_view.yml +++ b/test/fixtures/dialects/sparksql/databricks_dlt_create_view.yml @@ -3,7 +3,7 @@ # computed by SQLFluff when running the tests. Please run # `python test/generate_parse_fixture_yml.py` to generate them after adding or # altering SQL files. -_hash: 662d898d3182641df898b2ce6e2764fddffcbeb3003ad42317b6830c756b48ca +_hash: 555cf4e02736d842de63f822a3855bd54d969e2fb17a88186d4b53e039aa3241 file: - statement: create_view_statement: @@ -213,3 +213,156 @@ file: - dot: . - naked_identifier: dlt_bronze - statement_terminator: ; +- statement: + create_view_statement: + - keyword: CREATE + - keyword: OR + - keyword: REFRESH + - keyword: MATERIALIZED + - keyword: VIEW + - table_reference: + naked_identifier: my_dlt_mat_view + - bracketed: + - start_bracket: ( + - column_reference: + naked_identifier: col1 + - data_type: + primitive_type: + keyword: STRING + - keyword: COMMENT + - quoted_literal: "'Dummy column 1'" + - comma: ',' + - column_reference: + naked_identifier: col2 + - data_type: + primitive_type: + keyword: BIGINT + - keyword: COMMENT + - quoted_literal: "'Dummy column 2'" + - comma: ',' + - column_reference: + naked_identifier: col3 + - data_type: + primitive_type: + keyword: BOOLEAN + - keyword: COMMENT + - quoted_literal: "'Dummy column 3'" + - end_bracket: ) + - keyword: PARTITIONED + - keyword: BY + - bracketed: + start_bracket: ( + column_reference: + naked_identifier: col1 + end_bracket: ) + - keyword: COMMENT + - quoted_literal: "'Example simplified materialized view with dummy fields.'" + - keyword: TBLPROPERTIES + - bracketed: + start_bracket: ( + property_name_identifier: + quoted_identifier: "'quality'" + comparison_operator: + raw_comparison_operator: '=' + quoted_literal: "'gold'" + end_bracket: ) + - keyword: AS + - select_statement: + select_clause: + - keyword: SELECT + - select_clause_element: + column_reference: + naked_identifier: col1 + - comma: ',' + - select_clause_element: + column_reference: + naked_identifier: col2 + - comma: ',' + - select_clause_element: + column_reference: + naked_identifier: col3 + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + naked_identifier: my_source_table +- statement_terminator: ; +- statement: + create_view_statement: + - keyword: CREATE + - keyword: OR + - keyword: REFRESH + - keyword: MATERIALIZED + - keyword: VIEW + - table_reference: + naked_identifier: my_dlt_mat_view + - bracketed: + - start_bracket: ( + - column_reference: + naked_identifier: col1 + - data_type: + primitive_type: + keyword: STRING + - keyword: COMMENT + - quoted_literal: "'Dummy column 1'" + - comma: ',' + - column_reference: + naked_identifier: col2 + - data_type: + primitive_type: + keyword: BIGINT + - keyword: COMMENT + - quoted_literal: "'Dummy column 2'" + - comma: ',' + - column_reference: + naked_identifier: col3 + - data_type: + primitive_type: + keyword: BOOLEAN + - keyword: COMMENT + - quoted_literal: "'Dummy column 3'" + - end_bracket: ) + - table_cluster_by_clause: + - keyword: CLUSTER + - keyword: BY + - bracketed: + start_bracket: ( + column_reference: + naked_identifier: col1 + end_bracket: ) + - keyword: COMMENT + - quoted_literal: "'Example simplified materialized view with dummy fields.'" + - keyword: TBLPROPERTIES + - bracketed: + start_bracket: ( + property_name_identifier: + quoted_identifier: "'quality'" + comparison_operator: + raw_comparison_operator: '=' + quoted_literal: "'gold'" + end_bracket: ) + - keyword: AS + - select_statement: + select_clause: + - keyword: SELECT + - select_clause_element: + column_reference: + naked_identifier: col1 + - comma: ',' + - select_clause_element: + column_reference: + naked_identifier: col2 + - comma: ',' + - select_clause_element: + column_reference: + naked_identifier: col3 + from_clause: + keyword: FROM + from_expression: + from_expression_element: + table_expression: + table_reference: + naked_identifier: my_source_table +- statement_terminator: ;