Support Spark CREATE TABLE LIKE syntax (#2207)

* update test cases

change file names to better match spark docs

add file for create table like syntax

* create table like synax

* update createTableStatementSegment

updated to use AnyNumberOf instead of specifying LOCATION multiple times

* refresh yml fixtures

Co-authored-by: Barry Pollard <barry@tunetheweb.com>
This commit is contained in:
Lorin Dawson
2021-12-29 10:07:58 -05:00
committed by GitHub
parent 078e4e30af
commit 5eee5d94fe
7 changed files with 169 additions and 21 deletions

View File

@@ -578,9 +578,10 @@ class CreateFunctionStatementSegment(BaseSegment):
@spark3_dialect.segment(replace=True)
class CreateTableStatementSegment(BaseSegment):
"""A `CREATE TABLE` statement using a Data Source.
"""A `CREATE TABLE` statement using a Data Source or Like.
http://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-datasource.html
https://spark.apache.org/docs/latest/sql-ref-syntax-ddl-create-table-like.html
"""
type = "create_table_statement"
@@ -590,25 +591,36 @@ class CreateTableStatementSegment(BaseSegment):
"TABLE",
Ref("IfNotExistsGrammar", optional=True),
Ref("TableReferenceSegment"),
# Columns and comment syntax:
Sequence(
Bracketed(
Delimited(
Sequence(
Ref("ColumnDefinitionSegment"),
Ref("CommentGrammar", optional=True),
OneOf(
# Columns and comment syntax:
Sequence(
Bracketed(
Delimited(
Sequence(
Ref("ColumnDefinitionSegment"),
Ref("CommentGrammar", optional=True),
),
),
),
),
# Like Syntax
Sequence(
"LIKE",
Ref("TableReferenceSegment"),
),
optional=True,
),
Sequence("USING", Ref("DataSourceFormatGrammar"), optional=True),
Ref("RowFormatClauseSegment", optional=True),
Ref("StoredAsGrammar", optional=True),
Sequence("OPTIONS", Ref("BracketedPropertyListGrammar"), optional=True),
Ref("PartitionSpecGrammar", optional=True),
Ref("BucketSpecGrammar", optional=True),
Ref("LocationGrammar", optional=True),
Ref("CommentGrammar", optional=True),
Ref("TablePropertiesGrammar", optional=True),
AnyNumberOf(
Ref("LocationGrammar", optional=True),
Ref("CommentGrammar", optional=True),
Ref("TablePropertiesGrammar", optional=True),
),
# Create AS syntax:
Sequence(
"AS",

View File

@@ -10,7 +10,7 @@ INTO 4 BUCKETS
LOCATION 'root/database/table'
COMMENT 'this is a comment'
TBLPROPERTIES ( "key1" = "val1", "key2" = "val2" )
AS SELECT * FROM student ;
AS SELECT * FROM student;
--Use data source
CREATE TABLE student (id INT, student_name STRING, age INT) USING CSV;

View File

@@ -29,7 +29,7 @@ TBLPROPERTIES ('foo' = 'bar');
CREATE TABLE student (id INT, student_name STRING, age INT)
STORED AS ORC
TBLPROPERTIES ('foo' = 'bar')
COMMENT 'this is a comment' ;
COMMENT 'this is a comment';
--Create partitioned table
CREATE TABLE student (id INT, student_name STRING)
@@ -59,7 +59,7 @@ MAP KEYS TERMINATED BY ':'
LINES TERMINATED BY '\n'
NULL DEFINED AS 'foonull'
STORED AS TEXTFILE
LOCATION '/tmp/family/' ;
LOCATION '/tmp/family/';
--Use predefined custom SerDe
CREATE TABLE avroexample
@@ -70,19 +70,24 @@ OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.literal' =
'{"namespace": "org.apache.hive", "name": "first_schema", "type": "record", "fields": [ { "name":"string1", "type":"string" }, { "name":"string2", "type":"string" }] }'
'{
"namespace": "org.apache.hive",
"name": "first_schema",
"type": "record",
"fields": [ { "name":"string1", "type":"string" }, { "name":"string2", "type":"string" }]
}'
);
--Use personalized custom SerDe
--(we may need to `ADD JAR xxx.jar` first to ensure we can find the serde_class,
--or you may run into `CLASSNOTFOUND` exception)
ADD JAR '/tmp/hive_serde_example.jar' ;
ADD JAR '/tmp/hive_serde_example.jar';
CREATE EXTERNAL TABLE family (id INT, family_name STRING)
ROW FORMAT SERDE 'com.ly.spark.serde.SerDeExample'
STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat'
OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat'
LOCATION '/tmp/family/' ;
LOCATION '/tmp/family/';
--Use `CLUSTERED BY` clause to create bucket table without `SORTED BY`
CREATE TABLE clustered_by_test1 (id INT, age STRING)

View File

@@ -3,7 +3,7 @@
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: db583620b384aa5e754c6dc48569e2851a0686452db00329236ac0fb0a15c4b6
_hash: e796b6964105e7d8738fa449e0e94b7206303f19951a57f1cf80d27776f2074f
file:
- base:
create_table_statement:
@@ -453,9 +453,10 @@ file:
- start_bracket: (
- literal: "'avro.schema.literal'"
- comparison_operator: '='
- literal: "'{\"namespace\": \"org.apache.hive\", \"name\": \"first_schema\"\
, \"type\": \"record\", \"fields\": [ { \"name\":\"string1\", \"type\":\"\
string\" }, { \"name\":\"string2\", \"type\":\"string\" }] }'"
- literal: "'{\n \"namespace\": \"org.apache.hive\",\n \"name\"\
: \"first_schema\",\n \"type\": \"record\",\n \"fields\":\
\ [ { \"name\":\"string1\", \"type\":\"string\" }, { \"name\":\"string2\"\
, \"type\":\"string\" }]\n }'"
- end_bracket: )
- statement_terminator: ;
- base:

View File

@@ -0,0 +1,22 @@
-- Create Table Like with all optional syntax
CREATE TABLE IF NOT EXISTS table_identifier LIKE source_table_identifier
USING PARQUET
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS PARQUET
TBLPROPERTIES ( "key1" = "val1", "key2" = "val2")
LOCATION "path/to/files";
-- Create table using an existing table
CREATE TABLE student_dupli LIKE student;
-- Create table like using a data source
CREATE TABLE student_dupli LIKE student USING CSV;
-- Table is created as external table at the location specified
CREATE TABLE student_dupli LIKE student LOCATION '/root1/home';
-- Create table like using a rowformat
CREATE TABLE student_dupli LIKE student
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
TBLPROPERTIES ('owner' = 'xxxx');

View File

@@ -0,0 +1,108 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 91824c3c256efd684ca5cb5ce62f926c4fbdc591a29e0cfe3545fd4028a47caa
file:
- base:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- keyword: IF
- keyword: NOT
- keyword: EXISTS
- table_reference:
identifier: table_identifier
- keyword: LIKE
- table_reference:
identifier: source_table_identifier
- keyword: USING
- keyword: PARQUET
- row_format_clause:
- keyword: ROW
- keyword: FORMAT
- keyword: DELIMITED
- keyword: FIELDS
- keyword: TERMINATED
- keyword: BY
- literal: "','"
- keyword: STORED
- keyword: AS
- keyword: PARQUET
- keyword: TBLPROPERTIES
- bracketed:
- start_bracket: (
- literal: '"key1"'
- comparison_operator: '='
- literal: '"val1"'
- comma: ','
- literal: '"key2"'
- comparison_operator: '='
- literal: '"val2"'
- end_bracket: )
- keyword: LOCATION
- literal: '"path/to/files"'
- statement_terminator: ;
- base:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
identifier: student_dupli
- keyword: LIKE
- table_reference:
identifier: student
- statement_terminator: ;
- base:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
identifier: student_dupli
- keyword: LIKE
- table_reference:
identifier: student
- keyword: USING
- keyword: CSV
- statement_terminator: ;
- base:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
identifier: student_dupli
- keyword: LIKE
- table_reference:
identifier: student
- keyword: LOCATION
- literal: "'/root1/home'"
- statement_terminator: ;
- base:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
identifier: student_dupli
- keyword: LIKE
- table_reference:
identifier: student
- row_format_clause:
- keyword: ROW
- keyword: FORMAT
- keyword: DELIMITED
- keyword: FIELDS
- keyword: TERMINATED
- keyword: BY
- literal: "','"
- keyword: STORED
- keyword: AS
- file_format: TEXTFILE
- keyword: TBLPROPERTIES
- bracketed:
- start_bracket: (
- literal: "'owner'"
- comparison_operator: '='
- literal: "'xxxx'"
- end_bracket: )
- statement_terminator: ;