Dialect: Impala (#6445)

2024-11-11 10:30:50 +00:00
parent 02bca484a6
commit 0e333c3246
15 changed files with 1374 additions and 0 deletions
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -28,6 +28,9 @@ greenplum:
 hive:
  - "/(hive)/i"

+impala:
+  - "/(impala)/i"
+
 mariadb:
  - "/(mariadb)/i"

--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@ currently supports the following SQL dialects (though perhaps not in full):
 - [Exasol](https://www.exasol.com/)
 - [Greenplum](https://greenplum.org/)
 - [Hive](https://hive.apache.org/)
+- [Impala](https://impala.apache.org/)
 - [Materialize](https://materialize.com/)
 - [MySQL](https://www.mysql.com/)
 - [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,7 @@ keywords = [
    "exasol",
    "greenplum",
    "hive",
+    "impala",
    "materialize",
    "mariadb",
    "mysql",
--- a/src/sqlfluff/core/dialects/init.py
+++ b/src/sqlfluff/core/dialects/init.py
@@ -30,6 +30,7 @@ _dialect_lookup = {
    "exasol": ("dialect_exasol", "exasol_dialect"),
    "greenplum": ("dialect_greenplum", "greenplum_dialect"),
    "hive": ("dialect_hive", "hive_dialect"),
+    "impala": ("dialect_impala", "impala_dialect"),
    "materialize": ("dialect_materialize", "materialize_dialect"),
    "mariadb": ("dialect_mariadb", "mariadb_dialect"),
    "mysql": ("dialect_mysql", "mysql_dialect"),
--- a/src/sqlfluff/dialects/dialect_impala.py
+++ b/src/sqlfluff/dialects/dialect_impala.py
@@ -0,0 +1,194 @@
+"""The Impala dialect."""
+
+from sqlfluff.core.dialects import load_raw_dialect
+from sqlfluff.core.parser import (
+    BaseSegment,
+    BinaryOperatorSegment,
+    Bracketed,
+    Delimited,
+    OneOf,
+    Ref,
+    Sequence,
+    StringParser,
+)
+from sqlfluff.dialects import dialect_hive as hive
+from sqlfluff.dialects.dialect_impala_keywords import (
+    RESERVED_KEYWORDS,
+    UNRESERVED_KEYWORDS,
+)
+
+hive_dialect = load_raw_dialect("hive")
+impala_dialect = hive_dialect.copy_as(
+    "impala",
+    formatted_name="Apache Impala",
+    docstring="The dialect for Apache `Impala <https://impala.apache.org/>`_.",
+)
+
+impala_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS)
+impala_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS)
+
+impala_dialect.replace(
+    DivideSegment=OneOf(
+        StringParser("DIV", BinaryOperatorSegment),
+        StringParser("/", BinaryOperatorSegment),
+    )
+)
+
+
+class StatementSegment(hive.StatementSegment):
+    """A generic segment, to any of its child subsegments."""
+
+    type = "statement"
+
+    match_grammar = hive.StatementSegment.match_grammar.copy(
+        insert=[
+            Ref("ComputeStatsStatementSegment"),
+            Ref("InsertStatementSegment"),
+        ]
+    )
+
+
+class ComputeStatsStatementSegment(BaseSegment):
+    """A `COMPUTE STATS statement.
+
+    Full Apache Impala `COMPUTE STATS` reference here:
+    https://impala.apache.org/docs/build/html/topics/impala_compute_stats.html
+    """
+
+    type = "compute_stats_statement"
+
+    match_grammar = Sequence(
+        "COMPUTE",
+        OneOf(
+            Sequence("STATS", Ref("TableReferenceSegment")),
+            Sequence(
+                "INCREMENTAL",
+                "STATS",
+                Ref("TableReferenceSegment"),
+                Ref("PartitionSpecGrammar", optional=True),
+            ),
+        ),
+    )
+
+
+class CreateTableStatementSegment(hive.CreateTableStatementSegment):
+    """A `CREATE_TABLE` statement.
+
+    Full Apache Impala `CREATE TABLE` reference here:
+    https://impala.apache.org/docs/build/html/topics/impala_create_table.html
+    """
+
+    type = "create_table_statement"
+
+    match_grammar = Sequence(
+        "CREATE",
+        Ref.keyword("EXTERNAL", optional=True),
+        "TABLE",
+        Ref("IfNotExistsGrammar", optional=True),
+        Ref("TableReferenceSegment"),
+        Sequence(
+            Bracketed(
+                Delimited(
+                    OneOf(
+                        Ref("TableConstraintSegment", optional=True),
+                        Sequence(
+                            Ref("ColumnDefinitionSegment"),
+                            Ref("CommentGrammar", optional=True),
+                        ),
+                    ),
+                    bracket_pairs_set="angle_bracket_pairs",
+                ),
+                optional=True,
+            ),
+            Sequence(
+                "PARTITIONED",
+                "BY",
+                Bracketed(
+                    Delimited(
+                        Sequence(
+                            OneOf(
+                                Ref("ColumnDefinitionSegment"),
+                                Ref("SingleIdentifierGrammar"),
+                            ),
+                            Ref("CommentGrammar", optional=True),
+                        ),
+                    ),
+                ),
+                optional=True,
+            ),
+            Sequence(
+                "SORT",
+                "BY",
+                Bracketed(Delimited(Sequence(Ref("ColumnReferenceSegment")))),
+                optional=True,
+            ),
+            Ref("CommentGrammar", optional=True),
+            Ref("RowFormatClauseSegment", optional=True),
+            Ref("SerdePropertiesGrammar", optional=True),
+            Ref("StoredAsGrammar", optional=True),
+            Ref("LocationGrammar", optional=True),
+            Sequence(
+                OneOf(
+                    Sequence(
+                        "CACHED",
+                        "IN",
+                        Delimited(Ref("PoolNameReferenceSegment")),
+                        Sequence(
+                            "WITH",
+                            "REPLICATION",
+                            "=",
+                            Ref("NumericLiteralSegment"),
+                            optional=True,
+                        ),
+                    ),
+                    Ref.keyword("UNCACHED"),
+                ),
+                optional=True,
+            ),
+            Ref("TablePropertiesGrammar", optional=True),
+        ),
+    )
+
+
+class InsertStatementSegment(BaseSegment):
+    """An `INSERT` statement.
+
+    Full Apache Impala `INSERT` reference here:
+    https://impala.apache.org/docs/build/html/topics/impala_insert.html
+    """
+
+    type = "insert_statement"
+
+    match_grammar = Sequence(
+        "INSERT",
+        OneOf(
+            Sequence(
+                "OVERWRITE",
+                Ref.keyword("TABLE", optional=True),
+                Ref("TableReferenceSegment"),
+                Ref("PartitionSpecGrammar", optional=True),
+                Bracketed(
+                    OneOf("SHUFFLE", "NOSHUFFLE"), bracket_type="square", optional=True
+                ),
+                Ref("IfNotExistsGrammar", optional=True),
+                Ref("SelectableGrammar"),
+            ),
+            Sequence(
+                "INTO",
+                Ref.keyword("TABLE", optional=True),
+                Ref("TableReferenceSegment"),
+                Sequence(
+                    Bracketed(Delimited(Sequence(Ref("ColumnReferenceSegment")))),
+                    optional=True,
+                ),
+                Ref("PartitionSpecGrammar", optional=True),
+                Bracketed(
+                    OneOf("SHUFFLE", "NOSHUFFLE"), bracket_type="square", optional=True
+                ),
+                OneOf(
+                    Ref("SelectableGrammar"),
+                    Ref("ValuesClauseSegment"),
+                ),
+            ),
+        ),
+    )
--- a/src/sqlfluff/dialects/dialect_impala_keywords.py
+++ b/src/sqlfluff/dialects/dialect_impala_keywords.py
@@ -0,0 +1,565 @@
+"""A list of Impala keywords.
+
+https://impala.apache.org/docs/build/html/topics/impala_reserved_words.html
+
+Impala docs recommend respecting Hive keywords, so both lists include Hive's as well.
+"""
+
+RESERVED_KEYWORDS = [
+    "ADD",
+    "AGGREGATE",
+    "ALL",
+    "ALLOCATE",
+    "ALTER",
+    "ANALYTIC",
+    "AND",
+    "ANTI",
+    "ANY",
+    "API_VERSION",
+    "ARE",
+    "ARRAY",
+    "ARRAY_AGG",
+    "ARRAY_MAX_CARDINALITY",
+    "AS",
+    "ASC",
+    "ASENSITIVE",
+    "ASYMMETRIC",
+    "AT",
+    "ATOMIC",
+    "AUTHORIZATION",
+    "AVRO",
+    "BEGIN_FRAME",
+    "BEGIN_PARTITION",
+    "BETWEEN",
+    "BIGINT",
+    "BINARY",
+    "BLOB",
+    "BLOCK_SIZE",
+    "BOOLEAN",
+    "BOTH",
+    "BUCKETS",
+    "BY",
+    "CACHED",
+    "CALLED",
+    "CARDINALITY",
+    "CASCADE",
+    "CASCADED",
+    "CASE",
+    "CAST",
+    "CHANGE",
+    "CHAR",
+    "CHARACTER",
+    "CLASS",
+    "CLOB",
+    "CLOSE_FN",
+    "COLLATE",
+    "COLLECT",
+    "COLUMN",
+    "COLUMNS",
+    "COMMENT",
+    "COMMIT",
+    "COMPRESSION",
+    "COMPUTE",
+    "CONDITION",
+    "CONNECT",
+    "CONSTRAINT",
+    "CONTAINS",
+    "CONVERT",
+    "COPY",
+    "CORR",
+    "CORRESPONDING",
+    "COVAR_POP",
+    "COVAR_SAMP",
+    "CREATE",
+    "CROSS",
+    "CUBE",
+    "CURRENT",
+    "CURRENT_DATE",
+    "CURRENT_DEFAULT_TRANSFORM_GROUP",
+    "CURRENT_PATH",
+    "CURRENT_ROLE",
+    "CURRENT_ROW",
+    "CURRENT_SCHEMA",
+    "CURRENT_TIME",
+    "CURRENT_TRANSFORM_GROUP_FOR_TYPE",
+    "CURSOR",
+    "CYCLE",
+    "DATA",
+    "DATABASE",
+    "DATABASES",
+    "DATE",
+    "DATETIME",
+    "DEALLOCATE",
+    "DEC",
+    "DECFLOAT",
+    "DECIMAL",
+    "DECLARE",
+    "DEFAULT",
+    "DEFINE",
+    "DELETE",
+    "DELIMITED",
+    "DEREF",
+    "DESC",
+    "DESCRIBE",
+    "DETERMINISTIC",
+    "DISABLE",
+    "DISCONNECT",
+    "DISTINCT",
+    "DIV",
+    "DOUBLE",
+    "DROP",
+    "DYNAMIC",
+    "EACH",
+    "ELEMENT",
+    "ELSE",
+    "EMPTY",
+    "ENABLE",
+    "ENCODING",
+    "END",
+    "END-EXEC",
+    "END_FRAME",
+    "END_PARTITION",
+    "EQUALS",
+    "ESCAPE",
+    "ESCAPED",
+    "EVERY",
+    "EXCEPT",
+    "EXEC",
+    "EXECUTE",
+    "EXISTS",
+    "EXPLAIN",
+    "EXTENDED",
+    "EXTERNAL",
+    "FALSE",
+    "FETCH",
+    "FIELDS",
+    "FILEFORMAT",
+    "FILES",
+    "FILTER",
+    "FINALIZE_FN",
+    "FIRST",
+    "FLOAT",
+    "FOLLOWING",
+    "FOR",
+    "FOREIGN",
+    "FORMAT",
+    "FORMATTED",
+    "FRAME_ROW",
+    "FREE",
+    "FROM",
+    "FULL",
+    "FUNCTION",
+    "FUNCTIONS",
+    "FUSION",
+    "GET",
+    "GLOBAL",
+    "GRANT",
+    "GROUP",
+    "GROUPING",
+    "GROUPS",
+    "HASH",
+    "HAVING",
+    "HOLD",
+    "HUDIPARQUET",
+    "ICEBERG",
+    "IF",
+    "IGNORE",
+    "ILIKE",
+    "IN",
+    "INCREMENTAL",
+    "INDICATOR",
+    "INIT_FN",
+    "INITIAL",
+    "INNER",
+    "INOUT",
+    "INPATH",
+    "INSENSITIVE",
+    "INSERT",
+    "INT",
+    "INTEGER",
+    "INTERMEDIATE",
+    "INTERSECT",
+    "INTERSECTION",
+    "INTERVAL",
+    "INTO",
+    "INVALIDATE",
+    "IREGEXP",
+    "IS",
+    "JOIN",
+    "JSONFILE",
+    "JSON_ARRAY",
+    "JSON_ARRAYAGG",
+    "JSON_EXISTS",
+    "JSON_OBJECT",
+    "JSON_OBJECTAGG",
+    "JSON_QUERY",
+    "JSON_TABLE",
+    "JSON_TABLE_PRIMITIVE",
+    "JSON_VALUE",
+    "KUDU",
+    "LARGE",
+    "LAST",
+    "LATERAL",
+    "LEADING",
+    "LEFT",
+    "LEXICAL",
+    "LIKE",
+    "LIKE_REGEX",
+    "LIMIT",
+    "LINES",
+    "LISTAGG",
+    "LOAD",
+    "LOCAL",
+    "LOCALTIMESTAMP",
+    "LOCATION",
+    "LOG10",
+    "MAP",
+    "MANAGEDLOCATION",
+    "MATCH",
+    "MATCH_NUMBER",
+    "MATCH_RECOGNIZE",
+    "MATCHES",
+    "MERGE",
+    "MERGE_FN",
+    "METADATA",
+    "METHOD",
+    "MINUS",
+    "MODIFIES",
+    "MULTISET",
+    "NATIONAL",
+    "NATURAL",
+    "NCHAR",
+    "NCLOB",
+    "NO",
+    "NON",
+    "NONE",
+    "NORELY",
+    "NORMALIZE",
+    "NOT",
+    "NOVALIDATE",
+    "NTH_VALUE",
+    "NULL",
+    "NULLS",
+    "NUMERIC",
+    "OCCURRENCES_REGEX",
+    "OCTET_LENGTH",
+    "OF",
+    "OFFSET",
+    "OMIT",
+    "ON",
+    "ONE",
+    "ONLY",
+    "OPTIMIZE",
+    "OR",
+    "ORC",
+    "ORDER",
+    "OUT",
+    "OUTER",
+    "OVER",
+    "OVERLAPS",
+    "OVERLAY",
+    "OVERWRITE",
+    "PARQUET",
+    "PARQUETFILE",
+    "PARTITION",
+    "PARTITIONED",
+    "PARTITIONS",
+    "PATTERN",
+    "PER",
+    "PERCENT",
+    "PERCENTILE_CONT",
+    "PERCENTILE_DISC",
+    "PORTION",
+    "POSITION",
+    "POSITION_REGEX",
+    "PRECEDES",
+    "PRECEDING",
+    "PREPARE",
+    "PREPARE_FN",
+    "PRIMARY",
+    "PROCEDURE",
+    "PRODUCED",
+    "PTF",
+    "PURGE",
+    "RANGE",
+    "RCFILE",
+    "READS",
+    "REAL",
+    "RECOVER",
+    "RECURSIVE",
+    "REF",
+    "REFERENCES",
+    "REFERENCING",
+    "REFRESH",
+    "REGEXP",
+    "REGR_AVGX",
+    "REGR_AVGY",
+    "REGR_COUNT",
+    "REGR_INTERCEPT",
+    "REGR_R2",
+    "REGR_SLOPE",
+    "REGR_SXX",
+    "REGR_SXY",
+    "REGR_SYY",
+    "RELEASE",
+    "RELY",
+    "RENAME",
+    "REPEATABLE",
+    "REPLACE",
+    "REPLICATION",
+    "RESTRICT",
+    "RETURNS",
+    "REVOKE",
+    "RIGHT",
+    "RLIKE",
+    "ROLE",
+    "ROLES",
+    "ROLLBACK",
+    "ROLLUP",
+    "ROW",
+    "ROWS",
+    "RUNNING",
+    "RWSTORAGE",
+    "SAVEPOINT",
+    "SCHEMA",
+    "SCHEMAS",
+    "SCOPE",
+    "SCROLL",
+    "SEARCH",
+    "SEEK",
+    "SELECT",
+    "SELECTIVITY",
+    "SEMI",
+    "SENSITIVE",
+    "SEQUENCEFILE",
+    "SERDEPROPERTIES",
+    "SERIALIZE_FN",
+    "SET",
+    "SETS",
+    "SHOW",
+    "SIMILAR",
+    "SKIP",
+    "SMALLINT",
+    "SOME",
+    "SORT",
+    "SPEC",
+    "SPECIFIC",
+    "SPECIFICTYPE",
+    "SQLEXCEPTION",
+    "SQLSTATE",
+    "SQLWARNING",
+    "STATIC",
+    "STATS",
+    "STORAGEHANDLER_URI",
+    "STORED",
+    "STRAIGHT_JOIN",
+    "STRING",
+    "STRUCT",
+    "SUBMULTISET",
+    "SUBSET",
+    "SUBSTRING_REGEX",
+    "SUCCEEDS",
+    "SYMBOL",
+    "SYMMETRIC",
+    "SYSTEM_TIME",
+    "SYSTEM_USER",
+    "SYSTEM_VERSION",
+    "TABLE",
+    "TABLES",
+    "TABLESAMPLE",
+    "TBLPROPERTIES",
+    "TERMINATED",
+    "TEXTFILE",
+    "THEN",
+    "TIMESTAMP",
+    "TIMEZONE_HOUR",
+    "TIMEZONE_MINUTE",
+    "TINYINT",
+    "TO",
+    "TRAILING",
+    "TRANSLATE_REGEX",
+    "TRANSLATION",
+    "TREAT",
+    "TRIGGER",
+    "TRIM_ARRAY",
+    "TRUE",
+    "TRUNCATE",
+    "UESCAPE",
+    "UNBOUNDED",
+    "UNCACHED",
+    "UNION",
+    "UNIQUE",
+    "UNKNOWN",
+    "UNNEST",
+    "UNSET",
+    "UPDATE",
+    "UPDATE_FN",
+    "UPSERT",
+    "USE",
+    "USER_DEFINED_FN",
+    "USING",
+    "VALIDATE",
+    "VALUE_OF",
+    "VALUES",
+    "VARBINARY",
+    "VARCHAR",
+    "VARYING",
+    "VERSIONING",
+    "VIEW",
+    "WHEN",
+    "WHENEVER",
+    "WHERE",
+    "WIDTH_BUCKET",
+    "WINDOW",
+    "WITH",
+    "WITHIN",
+    "WITHOUT",
+    "ZORDER",
+    "CONF",
+    "CURRENT_TIMESTAMP",
+    "EXCHANGE",
+    "IMPORT",
+    "LESS",
+    "MACRO",
+    "MORE",
+    "PARTIALSCAN",
+    "PRESERVE",
+    "REDUCE",
+    "TRANSFORM",
+    "UNIQUEJOIN",
+    "USER",
+    "UTC_TMESTAMP",
+    "START",
+    "CACHE",
+    "DAYOFWEEK",
+    "EXTRACT",
+    "FLOOR",
+    "PRECISION",
+    "VIEWS",
+    "TIME",
+    "SYNC",
+]
+
+UNRESERVED_KEYWORDS = [
+    # Impala-specific
+    "SHUFFLE",
+    "NOSHUFFLE",
+    # Hive unreserved keywords
+    "ADMIN",
+    "AFTER",
+    "ANALYZE",
+    "ARCHIVE",
+    "BEFORE",
+    "BERNOULLI",
+    "BUCKET",
+    "CLUSTER",
+    "CLUSTERED",
+    "CLUSTERSTATUS",
+    "COLLECTION",
+    "COMPACT",
+    "COMPACTIONS",
+    "CONCATENATE",
+    "CONTINUE",
+    "DAY",
+    "DBPROPERTIES",
+    "DEFERRED",
+    "DEFINED",
+    "DEPENDENCY",
+    "DIRECTORIES",
+    "DIRECTORY",
+    "DISTRIBUTE",
+    "ELEM_TYPE",
+    "EXCLUSIVE",
+    "EXPORT",
+    "FILE",
+    "HOLD_DDLTIME",
+    "HOUR",
+    "IDXPROPERTIES",
+    "INDEX",
+    "INDEXES",
+    "INPUTDRIVER",
+    "INPUTFORMAT",
+    "ITEMS",
+    "JAR",
+    "KEYS",
+    "KEY_TYPE",
+    "LOCK",
+    "LOCKS",
+    "LOGICAL",
+    "LONG",
+    "MAPJOIN",
+    "MATERIALIZED",
+    "MINUTE",
+    "MONTH",
+    "MSCK",
+    "NOSCAN",
+    "NO_DROP",
+    "OFFLINE",
+    "OPTION",
+    "OUTPUTDRIVER",
+    "OUTPUTFORMAT",
+    "OWNER",
+    "PLUS",
+    "PRETTY",
+    "PRINCIPALS",
+    "PROTECTION",
+    "READ",
+    "READONLY",
+    "REBUILD",
+    "RECORDREADER",
+    "RECORDWRITER",
+    "RELOAD",
+    "REPAIR",
+    "REWRITE",
+    "SECOND",
+    "SERDE",
+    "SERVER",
+    "SHARED",
+    "SHOW_DATABASE",
+    "SKEWED",
+    "SORTED",
+    "SSL",
+    "STATISTICS",
+    "STREAMTABLE",
+    "SYSTEM",
+    "TEMPORARY",
+    "TOUCH",
+    "TRANSACTIONS",
+    "UNARCHIVE",
+    "UNDO",
+    "UNIONTYPE",
+    "UNLOCK",
+    "UNSIGNED",
+    "URI",
+    "UTC",
+    "UTCTIMESTAMP",
+    "VALUE_TYPE",
+    "WHILE",
+    "YEAR",
+    "AUTOCOMMIT",
+    "ISOLATION",
+    "LEVEL",
+    "SNAPSHOT",
+    "TRANSACTION",
+    "WORK",
+    "WRITE",
+    "ABORT",
+    "KEY",
+    "DETAIL",
+    "DOW",
+    "EXPRESSION",
+    "OPERATOR",
+    "QUARTER",
+    "SUMMARY",
+    "VECTORIZATION",
+    "WEEK",
+    "YEARS",
+    "MONTHS",
+    "WEEKS",
+    "DAYS",
+    "HOURS",
+    "MINUTES",
+    "SECONDS",
+    "TIMESTAMPTZ",
+    "ZONE",
+]
--- a/test/fixtures/dialects/impala/.sqlfluff
+++ b/test/fixtures/dialects/impala/.sqlfluff
@@ -0,0 +1,2 @@
+[sqlfluff]
+dialect = impala
--- a/test/fixtures/dialects/impala/compute_stats.sql
+++ b/test/fixtures/dialects/impala/compute_stats.sql
@@ -0,0 +1,3 @@
+COMPUTE STATS db.foo;
+
+COMPUTE INCREMENTAL STATS db.foo;
--- a/test/fixtures/dialects/impala/compute_stats.yml
+++ b/test/fixtures/dialects/impala/compute_stats.yml
@@ -0,0 +1,26 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 9cfb94b39d2db1240481cf4e1ed5067989948657fb1ef63d1f7ccffe85933f75
+file:
+- statement:
+    compute_stats_statement:
+    - keyword: COMPUTE
+    - keyword: STATS
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+- statement_terminator: ;
+- statement:
+    compute_stats_statement:
+    - keyword: COMPUTE
+    - keyword: INCREMENTAL
+    - keyword: STATS
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+- statement_terminator: ;
--- a/test/fixtures/dialects/impala/create_table.sql
+++ b/test/fixtures/dialects/impala/create_table.sql
@@ -0,0 +1,8 @@
+CREATE TABLE db.foo
+  (col1 integer, col2 string);
+
+CREATE TABLE db.foo (
+    col1 INT,
+    col2 STRING,
+    col3 DECIMAL(10,2)
+) PARTITIONED BY (col4 INT);
--- a/test/fixtures/dialects/impala/create_table.yml
+++ b/test/fixtures/dialects/impala/create_table.yml
@@ -0,0 +1,76 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 31109752b72e9e04185357c787940f20b3a9e350e5d532c5b0cac132c1687c27
+file:
+- statement:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - bracketed:
+      - start_bracket: (
+      - column_definition:
+          naked_identifier: col1
+          data_type:
+            primitive_type:
+              keyword: integer
+      - comma: ','
+      - column_definition:
+          naked_identifier: col2
+          data_type:
+            primitive_type:
+              keyword: string
+      - end_bracket: )
+- statement_terminator: ;
+- statement:
+    create_table_statement:
+    - keyword: CREATE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - bracketed:
+      - start_bracket: (
+      - column_definition:
+          naked_identifier: col1
+          data_type:
+            primitive_type:
+              keyword: INT
+      - comma: ','
+      - column_definition:
+          naked_identifier: col2
+          data_type:
+            primitive_type:
+              keyword: STRING
+      - comma: ','
+      - column_definition:
+          naked_identifier: col3
+          data_type:
+            primitive_type:
+              keyword: DECIMAL
+              bracketed_arguments:
+                bracketed:
+                - start_bracket: (
+                - numeric_literal: '10'
+                - comma: ','
+                - numeric_literal: '2'
+                - end_bracket: )
+      - end_bracket: )
+    - keyword: PARTITIONED
+    - keyword: BY
+    - bracketed:
+        start_bracket: (
+        column_definition:
+          naked_identifier: col4
+          data_type:
+            primitive_type:
+              keyword: INT
+        end_bracket: )
+- statement_terminator: ;
--- a/test/fixtures/dialects/impala/insert_into.sql
+++ b/test/fixtures/dialects/impala/insert_into.sql
@@ -0,0 +1,13 @@
+INSERT INTO TABLE db.foo SELECT col1, col2 FROM db.foo2;
+
+INSERT INTO TABLE db.foo VALUES ((1, 'a'), (2, 'b'));
+
+INSERT INTO TABLE db.foo PARTITION (col1, col2) SELECT col1, col2, col3 FROM db.foo2;
+
+INSERT INTO TABLE db.foo PARTITION (col1=1, col2='a') SELECT col3 FROM db.foo2;
+
+INSERT INTO TABLE db.foo [SHUFFLE] SELECT col1, col2 FROM db.foo2;
+
+INSERT INTO TABLE db.foo [NOSHUFFLE] SELECT col1, col2 FROM db.foo2;
+
+INSERT INTO db.foo (col1, col2) SELECT col1, col2 FROM db.foo2 WHERE col2 > 100;
--- a/test/fixtures/dialects/impala/insert_into.yml
+++ b/test/fixtures/dialects/impala/insert_into.yml
@@ -0,0 +1,256 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 268ad4ff926d91c8954231e1fe915e8da6a2c7d05aa18b4eda2c23d316e1cdeb
+file:
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - values_clause:
+        keyword: VALUES
+        bracketed:
+        - start_bracket: (
+        - expression:
+            bracketed:
+              start_bracket: (
+              numeric_literal: '1'
+              comma: ','
+              quoted_literal: "'a'"
+              end_bracket: )
+        - comma: ','
+        - expression:
+            bracketed:
+              start_bracket: (
+              numeric_literal: '2'
+              comma: ','
+              quoted_literal: "'b'"
+              end_bracket: )
+        - end_bracket: )
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - keyword: PARTITION
+    - bracketed:
+      - start_bracket: (
+      - column_reference:
+          naked_identifier: col1
+      - comma: ','
+      - column_reference:
+          naked_identifier: col2
+      - end_bracket: )
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col3
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - keyword: PARTITION
+    - bracketed:
+      - start_bracket: (
+      - column_reference:
+          naked_identifier: col1
+      - comparison_operator:
+          raw_comparison_operator: '='
+      - numeric_literal: '1'
+      - comma: ','
+      - column_reference:
+          naked_identifier: col2
+      - comparison_operator:
+          raw_comparison_operator: '='
+      - quoted_literal: "'a'"
+      - end_bracket: )
+    - select_statement:
+        select_clause:
+          keyword: SELECT
+          select_clause_element:
+            column_reference:
+              naked_identifier: col3
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - start_square_bracket: '['
+    - keyword: SHUFFLE
+    - end_square_bracket: ']'
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - start_square_bracket: '['
+    - keyword: NOSHUFFLE
+    - end_square_bracket: ']'
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: INTO
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - bracketed:
+      - start_bracket: (
+      - column_reference:
+          naked_identifier: col1
+      - comma: ','
+      - column_reference:
+          naked_identifier: col2
+      - end_bracket: )
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+        where_clause:
+          keyword: WHERE
+          expression:
+            column_reference:
+              naked_identifier: col2
+            comparison_operator:
+              raw_comparison_operator: '>'
+            numeric_literal: '100'
+- statement_terminator: ;
--- a/test/fixtures/dialects/impala/insert_overwrite.sql
+++ b/test/fixtures/dialects/impala/insert_overwrite.sql
@@ -0,0 +1,11 @@
+INSERT OVERWRITE TABLE db.foo SELECT col1, col2 FROM db.foo2;
+
+INSERT OVERWRITE TABLE db.foo PARTITION (col1, col2) SELECT col1, col2, col3 FROM db.foo2;
+
+INSERT OVERWRITE TABLE db.foo PARTITION (col1=1, col2='a') SELECT col3 FROM db.foo2;
+
+INSERT OVERWRITE TABLE db.foo [SHUFFLE] SELECT col1, col2 FROM db.foo2;
+
+INSERT OVERWRITE TABLE db.foo [NOSHUFFLE] SELECT col1, col2 FROM db.foo2;
+
+INSERT OVERWRITE TABLE db.foo IF NOT EXISTS SELECT col1, col2 FROM db.foo2;
--- a/test/fixtures/dialects/impala/insert_overwrite.yml
+++ b/test/fixtures/dialects/impala/insert_overwrite.yml
@@ -0,0 +1,214 @@
+# YML test files are auto-generated from SQL files and should not be edited by
+# hand. To help enforce this, the "hash" field in the file must match a hash
+# computed by SQLFluff when running the tests. Please run
+# `python test/generate_parse_fixture_yml.py`  to generate them after adding or
+# altering SQL files.
+_hash: 558acdaa5ca55c72355fb0172cca11fe7f35cd0f211204a892da40fda25e7251
+file:
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - keyword: PARTITION
+    - bracketed:
+      - start_bracket: (
+      - column_reference:
+          naked_identifier: col1
+      - comma: ','
+      - column_reference:
+          naked_identifier: col2
+      - end_bracket: )
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col3
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - keyword: PARTITION
+    - bracketed:
+      - start_bracket: (
+      - column_reference:
+          naked_identifier: col1
+      - comparison_operator:
+          raw_comparison_operator: '='
+      - numeric_literal: '1'
+      - comma: ','
+      - column_reference:
+          naked_identifier: col2
+      - comparison_operator:
+          raw_comparison_operator: '='
+      - quoted_literal: "'a'"
+      - end_bracket: )
+    - select_statement:
+        select_clause:
+          keyword: SELECT
+          select_clause_element:
+            column_reference:
+              naked_identifier: col3
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - start_square_bracket: '['
+    - keyword: SHUFFLE
+    - end_square_bracket: ']'
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - start_square_bracket: '['
+    - keyword: NOSHUFFLE
+    - end_square_bracket: ']'
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;
+- statement:
+    insert_statement:
+    - keyword: INSERT
+    - keyword: OVERWRITE
+    - keyword: TABLE
+    - table_reference:
+      - naked_identifier: db
+      - dot: .
+      - naked_identifier: foo
+    - keyword: IF
+    - keyword: NOT
+    - keyword: EXISTS
+    - select_statement:
+        select_clause:
+        - keyword: SELECT
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col1
+        - comma: ','
+        - select_clause_element:
+            column_reference:
+              naked_identifier: col2
+        from_clause:
+          keyword: FROM
+          from_expression:
+            from_expression_element:
+              table_expression:
+                table_reference:
+                - naked_identifier: db
+                - dot: .
+                - naked_identifier: foo2
+- statement_terminator: ;