Support Spark CREATE TABLE LIKE syntax (#2207)

* update test cases change file names to better match spark docs add file for create table like syntax * create table like synax * update createTableStatementSegment updated to use AnyNumberOf instead of specifying LOCATION multiple times * refresh yml fixtures Co-authored-by: Barry Pollard <barry@tunetheweb.com>
2021-12-29 10:07:58 -05:00
parent 078e4e30af
commit 5eee5d94fe
7 changed files with 169 additions and 21 deletions
--- a/src/sqlfluff/dialects/dialect_spark3.py
+++ b/src/sqlfluff/dialects/dialect_spark3.py
@@ -578,9 +578,10 @@ class CreateFunctionStatementSegment(BaseSegment):

@spark3_dialect.segment(replace=True)
 class CreateTableStatementSegment(BaseSegment):
-    """A `CREATE TABLE` statement using a Data Source.
+    """A `CREATE TABLE` statement using a Data Source or Like.

    http://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html
+    https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-like.html
    """

    type = "create_table_statement"
@@ -590,25 +591,36 @@ class CreateTableStatementSegment(BaseSegment):
        "TABLE",
        Ref("IfNotExistsGrammar", optional=True),
        Ref("TableReferenceSegment"),
-        # Columns and comment syntax:
-        Sequence(
-            Bracketed(
-                Delimited(
-                    Sequence(
-                        Ref("ColumnDefinitionSegment"),
-                        Ref("CommentGrammar", optional=True),
+        OneOf(
+            # Columns and comment syntax:
+            Sequence(
+                Bracketed(
+                    Delimited(
+                        Sequence(
+                            Ref("ColumnDefinitionSegment"),
+                            Ref("CommentGrammar", optional=True),
+                        ),
                    ),
                ),
            ),
+            # Like Syntax
+            Sequence(
+                "LIKE",
+                Ref("TableReferenceSegment"),
+            ),
            optional=True,
        ),
        Sequence("USING", Ref("DataSourceFormatGrammar"), optional=True),
+        Ref("RowFormatClauseSegment", optional=True),
+        Ref("StoredAsGrammar", optional=True),
        Sequence("OPTIONS", Ref("BracketedPropertyListGrammar"), optional=True),
        Ref("PartitionSpecGrammar", optional=True),
        Ref("BucketSpecGrammar", optional=True),
-        Ref("LocationGrammar", optional=True),
-        Ref("CommentGrammar", optional=True),
-        Ref("TablePropertiesGrammar", optional=True),
+        AnyNumberOf(
+            Ref("LocationGrammar", optional=True),
+            Ref("CommentGrammar", optional=True),
+            Ref("TablePropertiesGrammar", optional=True),
+        ),
        # Create AS syntax:
        Sequence(
            "AS",
--- a/test/fixtures/dialects/spark3/create_table_datasource.sql
+++ b/test/fixtures/dialects/spark3/create_table_datasource.sql
@@ -10,7 +10,7 @@ INTO 4 BUCKETS
 LOCATION 'root/database/table'
 COMMENT 'this is a comment'
 TBLPROPERTIES ( "key1" = "val1", "key2" = "val2" )
-AS SELECT * FROM student ;
+AS SELECT * FROM student;

 --Use data source
 CREATE TABLE student (id INT, student_name STRING, age INT) USING CSV;
--- a/test/fixtures/dialects/spark3/create_table_datasource.yml
+++ b/test/fixtures/dialects/spark3/create_table_datasource.yml
--- a/test/fixtures/dialects/spark3/create_table_hiveformat.sql
+++ b/test/fixtures/dialects/spark3/create_table_hiveformat.sql
@@ -29,7 +29,7 @@ TBLPROPERTIES ('foo' = 'bar');
 CREATE TABLE student (id INT, student_name STRING, age INT)
 STORED AS ORC
 TBLPROPERTIES ('foo' = 'bar')
-COMMENT 'this is a comment' ;
+COMMENT 'this is a comment';

 --Create partitioned table
 CREATE TABLE student (id INT, student_name STRING)
@@ -59,7 +59,7 @@ MAP KEYS TERMINATED BY ':'
 LINES TERMINATED BY '\n'
 NULL DEFINED AS 'foonull'
 STORED AS TEXTFILE
-LOCATION '/tmp/family/' ;
+LOCATION '/tmp/family/';

 --Use predefined custom SerDe
 CREATE TABLE avroexample
@@ -70,19 +70,24 @@ OUTPUTFORMAT
 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
 TBLPROPERTIES (
    'avro.schema.literal' =
-    '{"namespace": "org.apache.hive", "name": "first_schema", "type": "record", "fields": [ { "name":"string1", "type":"string" }, { "name":"string2", "type":"string" }] }'
+    '{
+        "namespace": "org.apache.hive",
+        "name": "first_schema",
+        "type": "record",
+        "fields": [ { "name":"string1", "type":"string" }, { "name":"string2", "type":"string" }]
+    }'
 );

 --Use personalized custom SerDe
 --(we may need to `ADD JAR xxx.jar` first to ensure we can find the serde_class,
 --or you may run into `CLASSNOTFOUND` exception)
-ADD JAR '/tmp/hive_serde_example.jar' ;
+ADD JAR '/tmp/hive_serde_example.jar';

 CREATE EXTERNAL TABLE family (id INT, family_name STRING)
 ROW FORMAT SERDE 'com.ly.spark.serde.SerDeExample'
 STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat'
 OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat'
-LOCATION '/tmp/family/' ;
+LOCATION '/tmp/family/';

 --Use `CLUSTERED BY` clause to create bucket table without `SORTED BY`
 CREATE TABLE clustered_by_test1 (id INT, age STRING)
--- a/test/fixtures/dialects/spark3/create_table_hiveformat.yml
+++ b/test/fixtures/dialects/spark3/create_table_hiveformat.yml
@@ -3,7 +3,7 @@
 # computed by SQLFluff when running the tests. Please run
 # `python test/generate_parse_fixture_yml.py`  to generate them after adding or
 # altering SQL files.
-_hash: db583620b384aa5e754c6dc48569e2851a0686452db00329236ac0fb0a15c4b6
+_hash: e796b6964105e7d8738fa449e0e94b7206303f19951a57f1cf80d27776f2074f
 file:
 - base:
    create_table_statement:
@@ -453,9 +453,10 @@ file:
      - start_bracket: (
      - literal: "'avro.schema.literal'"
      - comparison_operator: '='
-      - literal: "'{\"namespace\": \"org.apache.hive\", \"name\": \"first_schema\"\
-          , \"type\": \"record\", \"fields\": [ { \"name\":\"string1\", \"type\":\"\
-          string\" }, { \"name\":\"string2\", \"type\":\"string\" }] }'"
+      - literal: "'{\n        \"namespace\": \"org.apache.hive\",\n        \"name\"\
+          : \"first_schema\",\n        \"type\": \"record\",\n        \"fields\":\
+          \ [ { \"name\":\"string1\", \"type\":\"string\" }, { \"name\":\"string2\"\
+          , \"type\":\"string\" }]\n    }'"
      - end_bracket: )
 - statement_terminator: ;
 - base:
--- a/test/fixtures/dialects/spark3/create_table_like.sql
+++ b/test/fixtures/dialects/spark3/create_table_like.sql
@@ -0,0 +1,22 @@
+-- Create Table Like with all optional syntax
+CREATE TABLE IF NOT EXISTS table_identifier LIKE source_table_identifier
+USING PARQUET
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+STORED AS PARQUET
+TBLPROPERTIES ( "key1" = "val1", "key2" = "val2")
+LOCATION "path/to/files";
+
+-- Create table using an existing table
+CREATE TABLE student_dupli LIKE student;
+
+-- Create table like using a data source
+CREATE TABLE student_dupli LIKE student USING CSV;
+
+-- Table is created as external table at the location specified
+CREATE TABLE student_dupli LIKE student LOCATION '/root1/home';
+
+-- Create table like using a rowformat
+CREATE TABLE student_dupli LIKE student
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+STORED AS TEXTFILE
+TBLPROPERTIES ('owner' = 'xxxx');
--- a/test/fixtures/dialects/spark3/create_table_like.yml
+++ b/test/fixtures/dialects/spark3/create_table_like.yml
@@ -0,0 +1,108 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 91824c3c256efd684ca5cb5ce62f926c4fbdc591a29e0cfe3545fd4028a47caa
+file:
+- base:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - keyword: IF
+    - keyword: NOT
+    - keyword: EXISTS
+    - table_reference:
+        identifier: table_identifier
+    - keyword: LIKE
+    - table_reference:
+        identifier: source_table_identifier
+    - keyword: USING
+    - keyword: PARQUET
+    - row_format_clause:
+      - keyword: ROW
+      - keyword: FORMAT
+      - keyword: DELIMITED
+      - keyword: FIELDS
+      - keyword: TERMINATED
+      - keyword: BY
+      - literal: "','"
+    - keyword: STORED
+    - keyword: AS
+    - keyword: PARQUET
+    - keyword: TBLPROPERTIES
+    - bracketed:
+      - start_bracket: (
+      - literal: '"key1"'
+      - comparison_operator: '='
+      - literal: '"val1"'
+      - comma: ','
+      - literal: '"key2"'
+      - comparison_operator: '='
+      - literal: '"val2"'
+      - end_bracket: )
+    - keyword: LOCATION
+    - literal: '"path/to/files"'
+- statement_terminator: ;
+- base:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+        identifier: student_dupli
+    - keyword: LIKE
+    - table_reference:
+        identifier: student
+- statement_terminator: ;
+- base:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+        identifier: student_dupli
+    - keyword: LIKE
+    - table_reference:
+        identifier: student
+    - keyword: USING
+    - keyword: CSV
+- statement_terminator: ;
+- base:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+        identifier: student_dupli
+    - keyword: LIKE
+    - table_reference:
+        identifier: student
+    - keyword: LOCATION
+    - literal: "'/root1/home'"
+- statement_terminator: ;
+- base:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+        identifier: student_dupli
+    - keyword: LIKE
+    - table_reference:
+        identifier: student
+    - row_format_clause:
+      - keyword: ROW
+      - keyword: FORMAT
+      - keyword: DELIMITED
+      - keyword: FIELDS
+      - keyword: TERMINATED
+      - keyword: BY
+      - literal: "','"
+    - keyword: STORED
+    - keyword: AS
+    - file_format: TEXTFILE
+    - keyword: TBLPROPERTIES
+    - bracketed:
+      - start_bracket: (
+      - literal: "'owner'"
+      - comparison_operator: '='
+      - literal: "'xxxx'"
+      - end_bracket: )
+- statement_terminator: ;