Auto generate dialect docs (#6153)

Co-authored-by: Cameron <105471409+keraion@users.noreply.github.com>
2024-09-08 15:28:30 +01:00
parent f4229899ec
commit 2464d36c28
35 changed files with 249 additions and 274 deletions
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -25,6 +25,6 @@ build:
  tools:
    python: "3.11"
  jobs:
-    # Before building, generate the rule docs
+    # Before building, generate the rule & dialect docs
    pre_build:
-      - python docs/generate-rule-docs.py
+      - python docs/generate-auto-docs.py
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -20,5 +20,5 @@ help:
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	python generate-rule-docs.py
+	python generate-auto-docs.py
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/generate-auto-docs.py
+++ b/docs/generate-auto-docs.py
@@ -1,9 +1,24 @@
-"""Generate rule documentation automatically."""
+"""Generate some documentation automatically.
+
+This script generates partial documentation sections (i.e. the content of
+`/docs/source/_partials/`) by importing SQLFluff and extracting data about
+rules and dialects.
+
+It should run before every docs generation so that those partial .rst files
+can then be correctly referenced by other sections of the docs. For example
+this file builds the file `/docs/source/_partials/rule_summaries.rst`, which
+is then inserted into `/docs/source/reference/rules.rst` using the directive
+`.. include:: ../_partials/rule_summaries.rst`.
+
+This script is referenced in the `Makefile` and the `make.bat` file to ensure
+it is run at the appropriate moment.
+"""

 import json
 from collections import defaultdict
 from pathlib import Path

+import sqlfluff
 from sqlfluff.core.plugin.host import get_plugin_manager

 base_path = Path(__file__).parent.absolute()
@@ -110,3 +125,31 @@ with open(base_path / "source/_partials/rule_summaries.rst", "w", encoding="utf8
            f.write("\n\n")

 print("Rule Docs Generation: Done")
+
+# Extract all the dialects.
+print("Dialect Docs Generation: Reading Dialects...")
+# We make a dictionary of all of them first, because we want to force the ANSI
+# one to be first.
+dialect_dict = {dialect.label: dialect for dialect in sqlfluff.list_dialects()}
+dialect_list = [dialect_dict["ansi"]] + [
+    dialect for dialect_name, dialect in dialect_dict.items() if dialect_name != "ansi"
+]
+
+# Write each of the summary files.
+print("Dialect Docs Generation: Writing Dialect Summaries...")
+with open(
+    base_path / "source/_partials/dialect_summaries.rst", "w", encoding="utf8"
+) as f:
+    f.write(autogen_header)
+    for dialect in dialect_list:
+        f.write(
+            f".. _{dialect.label}_dialect_ref:\n\n"
+            f"{dialect.name}\n{'-' * len(dialect.name)}\n\n"
+            f"**Label**: ``{dialect.label}``\n\n"
+        )
+        if dialect.label != "ansi":
+            f.write(
+                f"**Inherits from**: :ref:`{dialect.inherits_from}_dialect_ref`\n\n"
+            )
+        if dialect.docstring:
+            f.write(dialect.docstring + "\n\n")
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -25,8 +25,8 @@ if errorlevel 9009 (
 	exit /b 1
 )

-REM Generate the rule docs
-python generate-rule-docs.py
+REM Generate the rule & dialect docs
+python generate-auto-docs.py

 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
--- a/docs/source/_partials/.gitignore
+++ b/docs/source/_partials/.gitignore
@@ -1,3 +1,4 @@
 rule_table.rst
 rule_summaries.rst
 rule_list.json
+dialect_summaries.rst
--- a/docs/source/_partials/README.md
+++ b/docs/source/_partials/README.md
@@ -6,4 +6,4 @@ Some of those files are also auto-generated by scripts,
 in which case they should be included in the `.gitignore`
 and not edited by hand.

-See [generate-rule-docs.py](https://github.com/sqlfluff/sqlfluff/blob/main/docs/generate-rule-docs.py) for more info.
+See [generate-auto-docs.py](https://github.com/sqlfluff/sqlfluff/blob/main/docs/generate-auto-docs.py) for more info.
--- a/docs/source/reference/dialects.rst
+++ b/docs/source/reference/dialects.rst
@@ -36,241 +36,4 @@ current dialects available on your installation of SQLFluff.
    - Will the feature I'm adding break any *downstream* dependencies
      within dialects which inherit from this one?

-.. _ansi_dialect_ref:
-
-ANSI
----
-
-This is the base dialect which holds most of the definitions of common
-SQL commands and structures. If the dialect which you're actually using
-isn't specifically implemented by SQLFluff, using this dialect is a good
-place to start.
-
-This dialect doesn't intend to be brutal in adhering to (and only to) the
-ANSI SQL spec *(mostly because ANSI charges for access to that spec)*. It aims
-to be a representation of vanilla SQL before any other project adds their
-spin to it, and so may contain a slightly wider set of functions than actually
-available in true ANSI SQL.
-
-.. _athena_dialect_ref:
-
-Athena
--------
-
-The dialect for `Amazon Athena`_.
-
-.. _`Amazon Athena`: https://aws.amazon.com/athena/
-
-.. _bigquery_dialect_ref:
-
-BigQuery
--------
-
-The dialect for `Google BigQuery`_.
-
-.. _`Google BigQuery`: https://cloud.google.com/bigquery/
-
-.. _clickhouse_dialect_ref:
-
-ClickHouse
----------
-
-The dialect for `ClickHouse`_.
-
-.. _`ClickHouse`: https://clickhouse.com/
-
-.. _databricks_dialect_ref:
-
-Databricks
----------
-
-The dialect `Databricks`_.
-
-.. _`Databricks`: https://databricks.com/
-
-.. _db2_dialect_ref:
-
-Db2
------
-
-The dialect for `Db2`_.
-
-.. _`Db2`: https://www.ibm.com/analytics/db2
-
-.. _duck_dialect_ref:
-
-DuckDB
------
-
-The dialect for `DuckDB`_.
-
-.. _`DuckDB`: https://duckdb.org/
-
-
-.. _exasol_dialect_ref:
-
-Exasol
------
-
-The dialect for `Exasol`_.
-
-.. _`Exasol`: https://www.exasol.com/
-
-.. _hive_dialect_ref:
-
-Greenplum
---------
-
-The dialect for `Greenplum`_.
-
-.. _`Greenplum`: https://www.greenplum.org/
-
-.. _greens_dialect_ref:
-
-Hive
----
-
-The dialect for `Hive`_.
-
-.. _`Hive`: https://hive.apache.org/
-
-.. _materialize_dialect_ref:
-
-Materialize
-----------
-
-The dialect for `Materialize`_.
-
-.. _`Materialize`: https://materialize.com/
-
-.. _mariadb_dialect_ref:
-
-MariaDB
-------
-
-The dialect for `MariaDB`_.
-
-.. _`MariaDB`: https://www.mariadb.org/
-
-.. _mysql_dialect_ref:
-
-MySQL
-----
-
-The dialect for `MySQL`_.
-
-.. _`MySQL`: https://www.mysql.com/
-
-.. _oracle_dialect_ref:
-
-Oracle
------
-
-The dialect for `Oracle`_ SQL. Note: this does not include PL/SQL.
-
-.. _`Oracle`: https://www.oracle.com/database/technologies/appdev/sql.html
-
-.. _postgres_dialect_ref:
-
-PostgreSQL
----------
-
-This is based around the `PostgreSQL spec`_. Many other SQL instances are often
-based on PostreSQL syntax. If you're running an unsupported dialect, then
-this is often the dialect to use (until someone makes a specific dialect).
-
-.. _`PostgreSQL spec`: https://www.postgresql.org/docs/9.6/reference.html
-
-.. _redshift_dialect_ref:
-
-Redshift
----------
-
-
-The dialect for `Amazon Redshift`_.
-
-.. _`Amazon Redshift`: https://aws.amazon.com/redshift/
-
-.. _snowflake_dialect_ref:
-
-Snowflake
---------
-
-The dialect for `Snowflake`_, which has much of its syntax
-inherited from :ref:`postgres_dialect_ref`.
-
-.. _`Snowflake`: https://docs.snowflake.com/en/sql-reference.html
-
-.. _soql_dialect_ref:
-
-SOQL
----
-
-The dialect for `SOQL`_ (Salesforce Object Query Language).
-
-.. _`SOQL`: https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql.htm
-
-.. _sparksql_dialect_ref:
-
-SparkSQL
--------
-
-The dialect for Apache `Spark SQL`_. It inherits from :ref:`ansi_dialect_ref`
-and includes relevant syntax from :ref:`hive_dialect_ref` for commands that
-permit Hive Format. Spark SQL extensions provided by the `Delta Lake`_ project
-are also implemented in this dialect.
-
-This implementation focuses on the `Ansi Compliant Mode`_ introduced in
-Spark3, instead of being Hive Compliant. The introduction of ANSI Compliance
-provides better data quality and easier migration from traditional DBMS.
-
-Versions of Spark prior to 3.x will only support the Hive dialect.
-
-.. _`Spark SQL`: https://spark.apache.org/docs/latest/sql-ref.html
-.. _`Delta Lake`: https://docs.delta.io/latest/quick-start.html#set-up-apache-spark-with-delta-lake
-.. _`Ansi Compliant Mode`: https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html
-
-.. _sqlite_dialect_ref:
-
-SQLite
------
-
-The dialect for `SQLite`_.
-
-.. _`SQLite`: https://www.sqlite.org/
-
-.. _tsql_dialect_ref:
-
-T-SQL
-----
-
-The dialect for `T-SQL`_ (aka Transact-SQL).
-
-.. _`T-SQL`: https://docs.microsoft.com/en-us/sql/t-sql/language-reference
-
-.. _teradata_dialect_ref:
-
-Teradata
--------
-
-The dialect for `Teradata`_.
-
-.. _`Teradata`: https://www.teradata.co.uk/
-
-.. _trino_dialect_ref:
-
-Trino
--------
-
-The dialect for `Trino`_.
-
-.. _`Trino`: https://trino.io/docs/current/
-
-.. _vertica_dialect_ref:
-
-Vertica
--------
-
-The dialect for `Vertica`_.
-
-.. _`Vertica`: https://www.vertica.com/documentation/vertica/all/
+.. include:: ../_partials/dialect_summaries.rst
--- a/src/sqlfluff/core/dialects/init.py
+++ b/src/sqlfluff/core/dialects/init.py
@@ -78,6 +78,7 @@ class DialectTuple(NamedTuple):
    label: str
    name: str
    inherits_from: str
+    docstring: str


 def dialect_readout() -> Iterator[DialectTuple]:
@@ -86,8 +87,9 @@ def dialect_readout() -> Iterator[DialectTuple]:
        dialect = load_raw_dialect(dialect_label)
        yield DialectTuple(
            label=dialect_label,
-            name=dialect.name,
+            name=dialect.formatted_name,
            inherits_from=dialect.inherits_from or "nothing",
+            docstring=dialect.docstring,
        )


--- a/src/sqlfluff/core/dialects/base.py
+++ b/src/sqlfluff/core/dialects/base.py
@@ -33,6 +33,8 @@ class Dialect:
        library: Optional[Dict[str, DialectElementType]] = None,
        sets: Optional[Dict[str, Set[Union[str, BracketPairTuple]]]] = None,
        inherits_from: Optional[str] = None,
+        formatted_name: Optional[str] = None,
+        docstring: Optional[str] = None,
    ) -> None:
        self._library = library or {}
        self.name = name
@@ -41,6 +43,9 @@ class Dialect:
        self._sets = sets or {}
        self.inherits_from = inherits_from
        self.root_segment_name = root_segment_name
+        # Attributes for documentation
+        self.formatted_name: str = formatted_name or name
+        self.docstring = docstring or f"The dialect for {self.formatted_name}."

    def __repr__(self) -> str:  # pragma: no cover
        return f"<Dialect: {self.name}>"
@@ -123,7 +128,12 @@ class Dialect:
            [n.strip().upper() for n in values.strip().split("\n")]
        )

-    def copy_as(self, name: str) -> "Dialect":
+    def copy_as(
+        self,
+        name: str,
+        formatted_name: Optional[str] = None,
+        docstring: Optional[str] = None,
+    ) -> "Dialect":
        """Copy this dialect and create a new one with a different name.

        This is the primary method for inheritance, after which, the
@@ -149,6 +159,9 @@ class Dialect:
            sets=new_sets,
            inherits_from=self.name,
            root_segment_name=self.root_segment_name,
+            # NOTE: We don't inherit the documentation fields.
+            formatted_name=formatted_name,
+            docstring=docstring,
        )

    def add(self, **kwargs: DialectElementType) -> None:
--- a/src/sqlfluff/dialects/dialect_ansi.py
+++ b/src/sqlfluff/dialects/dialect_ansi.py
@@ -64,7 +64,21 @@ from sqlfluff.dialects.dialect_ansi_keywords import (
    ansi_unreserved_keywords,
 )

-ansi_dialect = Dialect("ansi", root_segment_name="FileSegment")
+ansi_dialect = Dialect(
+    "ansi",
+    root_segment_name="FileSegment",
+    formatted_name="ANSI",
+    docstring="""This is the base dialect which holds most of the definitions of common
+SQL commands and structures. If the dialect which you're actually using
+isn't specifically implemented by SQLFluff, using this dialect is a good
+place to start.
+
+This dialect doesn't intend to be brutal in adhering to (and only to) the
+ANSI SQL spec *(mostly because ANSI charges for access to that spec)*. It aims
+to be a representation of vanilla SQL before any other project adds their
+spin to it, and so may contain a slightly wider set of functions than actually
+available in true ANSI SQL.""",
+)

 ansi_dialect.set_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_athena.py
+++ b/src/sqlfluff/dialects/dialect_athena.py
@@ -36,7 +36,12 @@ from sqlfluff.dialects.dialect_athena_keywords import (

 ansi_dialect = load_raw_dialect("ansi")

-athena_dialect = ansi_dialect.copy_as("athena")
+athena_dialect = ansi_dialect.copy_as(
+    "athena",
+    formatted_name="AWS Athena",
+    docstring="""The dialect for `Athena <https://aws.amazon.com/athena/>`_
+on Amazon Web Services (AWS).""",
+)

 athena_dialect.sets("unreserved_keywords").update(athena_unreserved_keywords)
 athena_dialect.sets("reserved_keywords").update(athena_reserved_keywords)
--- a/src/sqlfluff/dialects/dialect_bigquery.py
+++ b/src/sqlfluff/dialects/dialect_bigquery.py
@@ -43,7 +43,12 @@ from sqlfluff.dialects.dialect_bigquery_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-bigquery_dialect = ansi_dialect.copy_as("bigquery")
+bigquery_dialect = ansi_dialect.copy_as(
+    "bigquery",
+    formatted_name="Google BigQuery",
+    docstring="""The dialect for `BigQuery <https://cloud.google.com/bigquery/>`_
+on Google Cloud Platform (GCP).""",
+)

 bigquery_dialect.insert_lexer_matchers(
    # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html
--- a/src/sqlfluff/dialects/dialect_clickhouse.py
+++ b/src/sqlfluff/dialects/dialect_clickhouse.py
@@ -40,7 +40,11 @@ from sqlfluff.dialects.dialect_clickhouse_keywords import (

 ansi_dialect = load_raw_dialect("ansi")

-clickhouse_dialect = ansi_dialect.copy_as("clickhouse")
+clickhouse_dialect = ansi_dialect.copy_as(
+    "clickhouse",
+    formatted_name="ClickHouse",
+    docstring="The dialect for `ClickHouse <https://clickhouse.com/>`_.",
+)
 clickhouse_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS)

 clickhouse_dialect.insert_lexer_matchers(
--- a/src/sqlfluff/dialects/dialect_databricks.py
+++ b/src/sqlfluff/dialects/dialect_databricks.py
@@ -34,7 +34,11 @@ from sqlfluff.dialects.dialect_databricks_keywords import (
 )

 sparksql_dialect = load_raw_dialect("sparksql")
-databricks_dialect = sparksql_dialect.copy_as("databricks")
+databricks_dialect = sparksql_dialect.copy_as(
+    "databricks",
+    formatted_name="Databricks",
+    docstring="The dialect for `Databricks <https://databricks.com/>`_.",
+)

 databricks_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS)
 databricks_dialect.sets("unreserved_keywords").update(
--- a/src/sqlfluff/dialects/dialect_db2.py
+++ b/src/sqlfluff/dialects/dialect_db2.py
@@ -34,7 +34,11 @@ from sqlfluff.dialects.dialect_db2_keywords import UNRESERVED_KEYWORDS

 ansi_dialect = load_raw_dialect("ansi")

-db2_dialect = ansi_dialect.copy_as("db2")
+db2_dialect = ansi_dialect.copy_as(
+    "db2",
+    formatted_name="IBM Db2",
+    docstring="The dialect for IBM `Db2 <https://www.ibm.com/analytics/db2>`_.",
+)
 db2_dialect.sets("reserved_keywords").remove("NATURAL")
 db2_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS)

--- a/src/sqlfluff/dialects/dialect_duckdb.py
+++ b/src/sqlfluff/dialects/dialect_duckdb.py
@@ -31,7 +31,11 @@ from sqlfluff.dialects import dialect_postgres as postgres

 ansi_dialect = load_raw_dialect("ansi")
 postgres_dialect = load_raw_dialect("postgres")
-duckdb_dialect = postgres_dialect.copy_as("duckdb")
+duckdb_dialect = postgres_dialect.copy_as(
+    "duckdb",
+    formatted_name="DuckDB",
+    docstring="The dialect for `DuckDB <https://duckdb.org/>`_.",
+)

 duckdb_dialect.sets("reserved_keywords").update(
    [
--- a/src/sqlfluff/dialects/dialect_exasol.py
+++ b/src/sqlfluff/dialects/dialect_exasol.py
@@ -44,7 +44,11 @@ from sqlfluff.dialects.dialect_exasol_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-exasol_dialect = ansi_dialect.copy_as("exasol")
+exasol_dialect = ansi_dialect.copy_as(
+    "exasol",
+    formatted_name="Exasol",
+    docstring="The dialect for `Exasol <https://www.exasol.com/>`_.",
+)

 # Clear ANSI Keywords and add all EXASOL keywords
 exasol_dialect.sets("unreserved_keywords").clear()
--- a/src/sqlfluff/dialects/dialect_greenplum.py
+++ b/src/sqlfluff/dialects/dialect_greenplum.py
@@ -27,7 +27,11 @@ from sqlfluff.dialects.dialect_postgres_keywords import get_keywords

 postgres_dialect = load_raw_dialect("postgres")

-greenplum_dialect = postgres_dialect.copy_as("greenplum")
+greenplum_dialect = postgres_dialect.copy_as(
+    "greenplum",
+    formatted_name="Greenplum",
+    docstring="The dialect for `Greenplum <https://www.greenplum.org/>`_.",
+)

 greenplum_dialect.sets("reserved_keywords").update(
    get_keywords(greenplum_keywords, "reserved")
--- a/src/sqlfluff/dialects/dialect_hive.py
+++ b/src/sqlfluff/dialects/dialect_hive.py
@@ -31,7 +31,11 @@ from sqlfluff.dialects.dialect_hive_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-hive_dialect = ansi_dialect.copy_as("hive")
+hive_dialect = ansi_dialect.copy_as(
+    "hive",
+    formatted_name="Apache Hive",
+    docstring="The dialect for Apache `Hive <https://hive.apache.org/>`_.",
+)

 # Clear ANSI Keywords and add all Hive keywords
 # Commented clearing for now as some are needed for some statements imported
--- a/src/sqlfluff/dialects/dialect_mariadb.py
+++ b/src/sqlfluff/dialects/dialect_mariadb.py
@@ -26,7 +26,11 @@ from sqlfluff.dialects.dialect_mariadb_keywords import (

 # ansi_dialect = load_raw_dialect("ansi")
 mysql_dialect = load_raw_dialect("mysql")
-mariadb_dialect = mysql_dialect.copy_as("mariadb")
+mariadb_dialect = mysql_dialect.copy_as(
+    "mariadb",
+    formatted_name="MariaDB",
+    docstring="The dialect for `MariaDB <https://www.mariadb.org/>`_.",
+)
 mariadb_dialect.update_keywords_set_from_multiline_string(
    "unreserved_keywords", mariadb_unreserved_keywords
 )
--- a/src/sqlfluff/dialects/dialect_materialize.py
+++ b/src/sqlfluff/dialects/dialect_materialize.py
@@ -24,7 +24,11 @@ from sqlfluff.dialects.dialect_materialize_keywords import (

 postgres_dialect = load_raw_dialect("postgres")

-materialize_dialect = postgres_dialect.copy_as("materialize")
+materialize_dialect = postgres_dialect.copy_as(
+    "materialize",
+    formatted_name="Materialize",
+    docstring="The dialect for `Materialize <https://materialize.com/>`_.",
+)
 materialize_dialect.update_keywords_set_from_multiline_string(
    "unreserved_keywords", materialize_unreserved_keywords
 )
--- a/src/sqlfluff/dialects/dialect_mysql.py
+++ b/src/sqlfluff/dialects/dialect_mysql.py
@@ -41,7 +41,11 @@ from sqlfluff.dialects.dialect_mysql_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-mysql_dialect = ansi_dialect.copy_as("mysql")
+mysql_dialect = ansi_dialect.copy_as(
+    "mysql",
+    formatted_name="MySQL",
+    docstring="The dialect for `MySQL <https://www.mysql.com/>`_.",
+)

 mysql_dialect.patch_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_oracle.py
+++ b/src/sqlfluff/dialects/dialect_oracle.py
@@ -38,7 +38,13 @@ from sqlfluff.core.parser import (
 from sqlfluff.dialects import dialect_ansi as ansi

 ansi_dialect = load_raw_dialect("ansi")
-oracle_dialect = ansi_dialect.copy_as("oracle")
+oracle_dialect = ansi_dialect.copy_as(
+    "oracle",
+    formatted_name="Oracle",
+    docstring="""The dialect for `Oracle`_ SQL. Note: this does not include PL/SQL.
+
+.. _`Oracle`: https://www.oracle.com/database/technologies/appdev/sql.html""",
+)

 oracle_dialect.sets("unreserved_keywords").difference_update(["COMMENT"])
 oracle_dialect.sets("reserved_keywords").update(
--- a/src/sqlfluff/dialects/dialect_postgres.py
+++ b/src/sqlfluff/dialects/dialect_postgres.py
@@ -44,7 +44,16 @@ from sqlfluff.dialects.dialect_postgres_keywords import (

 ansi_dialect = load_raw_dialect("ansi")

-postgres_dialect = ansi_dialect.copy_as("postgres")
+postgres_dialect = ansi_dialect.copy_as(
+    "postgres",
+    formatted_name="PostgreSQL",
+    docstring="""This is based around the `PostgreSQL spec`_. Many other SQL
+dialects are often based on the PostreSQL syntax. If you're running an unsupported
+dialect, then this is often the dialect to use (until someone makes a specific
+dialect).
+
+.. _`PostgreSQL spec`: https://www.postgresql.org/docs/current/reference.html""",
+)

 postgres_dialect.insert_lexer_matchers(
    # JSON Operators: https://www.postgresql.org/docs/9.5/functions-json.html
--- a/src/sqlfluff/dialects/dialect_redshift.py
+++ b/src/sqlfluff/dialects/dialect_redshift.py
@@ -35,7 +35,12 @@ from sqlfluff.dialects.dialect_redshift_keywords import (

 postgres_dialect = load_raw_dialect("postgres")
 ansi_dialect = load_raw_dialect("ansi")
-redshift_dialect = postgres_dialect.copy_as("redshift")
+redshift_dialect = postgres_dialect.copy_as(
+    "redshift",
+    formatted_name="AWS Redshift",
+    docstring="""The dialect for `Redshift <https://aws.amazon.com/redshift/>`_
+on Amazon Web Services (AWS).""",
+)

 # Set Keywords
 redshift_dialect.sets("unreserved_keywords").clear()
--- a/src/sqlfluff/dialects/dialect_snowflake.py
+++ b/src/sqlfluff/dialects/dialect_snowflake.py
@@ -42,7 +42,13 @@ from sqlfluff.dialects.dialect_snowflake_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-snowflake_dialect = ansi_dialect.copy_as("snowflake")
+snowflake_dialect = ansi_dialect.copy_as(
+    "snowflake",
+    formatted_name="Snowflake",
+    docstring="""The dialect for
+`Snowflake <https://docs.snowflake.com/en/sql-reference.html>`_,
+which has much of its syntax inherited from :ref:`postgres_dialect_ref`.""",
+)

 snowflake_dialect.patch_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_soql.py
+++ b/src/sqlfluff/dialects/dialect_soql.py
@@ -18,7 +18,15 @@ from sqlfluff.dialects import dialect_ansi as ansi

 ansi_dialect = load_raw_dialect("ansi")

-soql_dialect = ansi_dialect.copy_as("soql")
+soql_dialect = ansi_dialect.copy_as(
+    "soql",
+    formatted_name="Salesforce Object Query Language (SOQL)",
+    docstring=(
+        "The dialect for `SOQL <https://developer.salesforce.com/docs/"
+        "atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql.htm>`_ "
+        "(Salesforce Object Query Language)."
+    ),
+)

 soql_dialect.insert_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_sparksql.py
+++ b/src/sqlfluff/dialects/dialect_sparksql.py
@@ -52,7 +52,24 @@ from sqlfluff.dialects.dialect_sparksql_keywords import (

 ansi_dialect = load_raw_dialect("ansi")
 hive_dialect = load_raw_dialect("hive")
-sparksql_dialect = ansi_dialect.copy_as("sparksql")
+sparksql_dialect = ansi_dialect.copy_as(
+    "sparksql",
+    formatted_name="Apache Spark SQL",
+    docstring="""The dialect for Apache `Spark SQL`_. This includes relevant
+syntax from :ref:`hive_dialect_ref` for commands that permit Hive Format.
+Spark SQL extensions provided by the `Delta Lake`_ project are also implemented
+in this dialect.
+
+This implementation focuses on the `Ansi Compliant Mode`_ introduced in
+Spark3, instead of being Hive Compliant. The introduction of ANSI Compliance
+provides better data quality and easier migration from traditional DBMS.
+
+Versions of Spark prior to 3.x will only support the Hive dialect.
+
+.. _`Spark SQL`: https://spark.apache.org/docs/latest/sql-ref.html
+.. _`Delta Lake`: https://docs.delta.io/latest/quick-start.html#set-up-apache-spark-with-delta-lake
+.. _`Ansi Compliant Mode`: https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html""",  # noqa: E501
+)

 sparksql_dialect.patch_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_sqlite.py
+++ b/src/sqlfluff/dialects/dialect_sqlite.py
@@ -37,7 +37,11 @@ from sqlfluff.dialects.dialect_sqlite_keywords import (

 ansi_dialect = load_raw_dialect("ansi")

-sqlite_dialect = ansi_dialect.copy_as("sqlite")
+sqlite_dialect = ansi_dialect.copy_as(
+    "sqlite",
+    formatted_name="SQLite",
+    docstring="""The dialect for `SQLite <https://www.sqlite.org/>`_.""",
+)

 sqlite_dialect.sets("reserved_keywords").clear()
 sqlite_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS)
--- a/src/sqlfluff/dialects/dialect_teradata.py
+++ b/src/sqlfluff/dialects/dialect_teradata.py
@@ -33,7 +33,11 @@ from sqlfluff.core.parser import (
 from sqlfluff.dialects import dialect_ansi as ansi

 ansi_dialect = load_raw_dialect("ansi")
-teradata_dialect = ansi_dialect.copy_as("teradata")
+teradata_dialect = ansi_dialect.copy_as(
+    "teradata",
+    formatted_name="Teradata",
+    docstring="""The dialect for `Teradata <https://www.teradata.co.uk/>`_.""",
+)

 teradata_dialect.patch_lexer_matchers(
    [
--- a/src/sqlfluff/dialects/dialect_trino.py
+++ b/src/sqlfluff/dialects/dialect_trino.py
@@ -33,7 +33,11 @@ from sqlfluff.dialects.dialect_trino_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-trino_dialect = ansi_dialect.copy_as("trino")
+trino_dialect = ansi_dialect.copy_as(
+    "trino",
+    formatted_name="Trino",
+    docstring="""The dialect for `Trino <https://trino.io/docs/current/>`_.""",
+)

 # Set the bare functions: https://trino.io/docs/current/functions/datetime.html
 trino_dialect.sets("bare_functions").update(
--- a/src/sqlfluff/dialects/dialect_tsql.py
+++ b/src/sqlfluff/dialects/dialect_tsql.py
@@ -47,7 +47,13 @@ from sqlfluff.dialects.dialect_tsql_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-tsql_dialect = ansi_dialect.copy_as("tsql")
+tsql_dialect = ansi_dialect.copy_as(
+    "tsql",
+    formatted_name="Microsoft T-SQL",
+    docstring="""The dialect for `T-SQL`_ (aka Transact-SQL).
+
+.. _`T-SQL`: https://docs.microsoft.com/en-us/sql/t-sql/language-reference""",
+)

 tsql_dialect.sets("reserved_keywords").clear()
 tsql_dialect.sets("unreserved_keywords").clear()
--- a/src/sqlfluff/dialects/dialect_vertica.py
+++ b/src/sqlfluff/dialects/dialect_vertica.py
@@ -39,7 +39,12 @@ from sqlfluff.dialects.dialect_vertica_keywords import (
 )

 ansi_dialect = load_raw_dialect("ansi")
-vertica_dialect = ansi_dialect.copy_as("vertica")
+vertica_dialect = ansi_dialect.copy_as(
+    "vertica",
+    formatted_name="Vertica",
+    docstring="""The dialect for
+`Vertica <https://www.vertica.com/documentation/vertica/all/>`_.""",
+)

 vertica_dialect.insert_lexer_matchers(
    # Allow ::! operator as in
--- a/test/api/info_test.py
+++ b/test/api/info_test.py
@@ -8,7 +8,22 @@ def test__api__info_dialects():
    """Basic linting of dialects."""
    dialects = sqlfluff.list_dialects()
    assert isinstance(dialects, list)
-    assert ("ansi", "ansi", "nothing") in dialects
+    # Turn it into a dict so we can look for items in there.
+    dialect_dict = {dialect.label: dialect for dialect in dialects}
+    # Check the ansi dialect works
+    assert "ansi" in dialect_dict
+    ansi = dialect_dict["ansi"]
+    assert ansi.label == "ansi"
+    assert ansi.name == "ANSI"
+    assert ansi.inherits_from == "nothing"
+    assert "This is the base dialect" in ansi.docstring
+    # Check one other works
+    assert "postgres" in dialect_dict
+    postgres = dialect_dict["postgres"]
+    assert postgres.label == "postgres"
+    assert postgres.name == "PostgreSQL"
+    assert postgres.inherits_from == "ansi"
+    assert "this is often the dialect to use" in postgres.docstring


 def test__api__info_rules():
--- a/tox.ini
+++ b/tox.ini
@@ -99,9 +99,9 @@ commands = yamllint -c .yamllint .
 deps =
    -rdocs/requirements.txt
 commands =
-    # Before linting, generate the rule docs.
+    # Before linting, generate the rule & dialect docs.
    # If we don't we get import errors.
-    python {toxinidir}/docs/generate-rule-docs.py
+    python {toxinidir}/docs/generate-auto-docs.py
    doc8 {toxinidir}/docs/source --file-encoding utf8

 [testenv:docbuild]